High-level ArrayStream Implementation#

class Array(obj, schema=None, device=None)#

High-level in-memory Array representation

The Array is nanoarrow’s high-level in-memory array representation whose scope maps to that of a fully-consumed ArrowArrayStream in the Arrow C Data interface.

The Array class is nanoarrow’s high-level in-memory array representation, encompasing the role of PyArrow’s Array, ChunkedArray, RecordBatch, and Table. This scope maps to that of a fully-consumed ArrowArrayStream as represented by the Arrow C Stream interface.

Note that an Array is not necessarily contiguous in memory (i.e., it may consist of zero or more ``ArrowArray``s).

Parameters#

objarray or array stream-like: An array-like or array stream-like object as sanitized by c_array_stream().
schemaschema-like, optional: An optional schema, passed to c_array_stream().
deviceDevice, optional: The device associated with the buffers held by this Array. Defaults to the CPU device.

Examples#

>>> import nanoarrow as na
>>> na.Array([1, 2, 3], na.int32())
nanoarrow.Array<int32>[3]
1
2
3

buffer(i: int) → CBufferView#

Access a single buffer of a contiguous array

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> array.buffer(1)
nanoarrow.c_buffer.CBufferView(int32[12 b] 1 2 3)

property buffers: Tuple[CBufferView, ...]#

Access buffers of a contiguous array.

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> for buffer in array.buffers:
...     print(buffer)
nanoarrow.c_buffer.CBufferView(bool[0 b] )
nanoarrow.c_buffer.CBufferView(int32[12 b] 1 2 3)

child(i: int)#

Borrow a child Array from its parent.

Parameters#

iint: The index of the child to return.

Examples#

>>> import nanoarrow as na
>>> import pyarrow as pa
>>> batch = pa.record_batch(
...     [pa.array([1, 2, 3]), pa.array(["a", "b", "c"])],
...     names=["col1", "col2"]
... )
>>> array = na.Array(batch)
>>> array.child(1)
nanoarrow.Array<'col2': string>[3]
'a'
'b'
'c'

chunk(i: int)#

Extract a single contiguous Array from the underlying representation.

Parameters#

iint: The index of the chunk to extract.

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> array.chunk(0)
nanoarrow.Array<int32>[3]
1
2
3

property device: Device#

Get the device on which the buffers for this array are allocated

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> array.device
<nanoarrow.device.Device>
- device_type: CPU <1>
- device_id: -1

static from_chunks(obj: Iterable, schema=None, validate: bool = True)#

Create an Array with explicit chunks

Creates an Array with explicit chunking from an iterable of objects that can be converted to a c_array().

Parameters#

objiterable of array-like: An iterable of objects that can be passed to c_array().
schemaschema-like, optional: An optional schema. If present, will be passed to c_array() for each item in obj; if not present it will be inferred from the first chunk.
validatebool: Use False to opt out of validation steps performed when constructing this array.

Examples#

>>> import nanoarrow as na
>>> na.Array.from_chunks([[1, 2, 3], [4, 5, 6]], na.int32())
nanoarrow.Array<int32>[6]
1
2
3
4
5
6

inspect()#: Print the details of the array (type, length, offset, buffers, and children arrays).

iter_children() → Iterable#

Iterate over children of this Array

Examples#

>>> import nanoarrow as na
>>> import pyarrow as pa
>>> batch = pa.record_batch(
...     [pa.array([1, 2, 3]), pa.array(["a", "b", "c"])],
...     names=["col1", "col2"]
... )
>>> array = na.Array(batch)
>>> for child in array.iter_children():
...     print(child)
nanoarrow.Array<'col1': int64>[3]
1
2
3
nanoarrow.Array<'col2': string>[3]
'a'
'b'
'c'

iter_chunk_views() → Iterable[CArrayView]#

Iterate over prepared views of each chunk

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> for view in array.iter_chunk_views():
...     offset, length = view.offset, len(view)
...     validity, data = view.buffers
...     print(offset, length)
...     print(validity)
...     print(data)
0 3
nanoarrow.c_buffer.CBufferView(bool[0 b] )
nanoarrow.c_buffer.CBufferView(int32[12 b] 1 2 3)

iter_chunks() → Iterable#

Iterate over Arrays in the underlying representation whose buffers are contiguous in memory.

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> for chunk in array.iter_chunks():
...     print(chunk)
nanoarrow.Array<int32>[3]
1
2
3

iter_py() → Iterable#

Iterate over the default Python representation of each element.

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> for item in array.iter_py():
...     print(item)
1
2
3

iter_scalar() → Iterable[Scalar]#

Iterate over items as Scalars

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> for item in array.iter_scalar():
...     print(item)
Scalar<int32> 1
Scalar<int32> 2
Scalar<int32> 3

iter_tuples() → Iterable[Tuple]#

Iterate over rows of a struct array as tuples.

Examples#

>>> import nanoarrow as na
>>> import pyarrow as pa
>>> batch = pa.record_batch(
...     [pa.array([1, 2, 3]), pa.array(["a", "b", "c"])],
...     names=["col1", "col2"]
... )
>>> array = na.Array(batch)
>>> for item in array.iter_tuples():
...     print(item)
(1, 'a')
(2, 'b')
(3, 'c')

property n_buffers: int#

Get the number of buffers in each chunk of this Array

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> array.n_buffers
2

property n_children: int#

Get the number of children for an Array of this type.

Examples#

>>> import nanoarrow as na
>>> import pyarrow as pa
>>> batch = pa.record_batch(
...     [pa.array([1, 2, 3]), pa.array(["a", "b", "c"])],
...     names=["col1", "col2"]
... )
>>> array = na.Array(batch)
>>> array.n_children
2

property n_chunks: int#

Get the number of chunks in the underlying representation of this Array.

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> array.n_chunks
1

property offset: int#

Access the logical offset of a contiguous array

Examples#

>>> import nanoarrow as na
>>> c_array = na.c_array([1, 2, 3], na.int32())
>>> na.Array(c_array[1:]).offset
1

property schema: Schema#: Get the schema (data type) of this Array

serialize(dst=None) → bytes | None#

Write this Array into dst as zero or more encapsulated IPC messages

Parameters#

dstfile-like, optional: If present, a file-like object into which the chunks of this array should be serialized. If omitted, this will create a io.BytesIO() and return the serialized result.

class Scalar#

Generic wrapper around an Array element

This class exists to provide a generic implementation of array-like indexing for the Array. These objects can currently only be created by extracting an element from an Array.

Note that it is rarely efficient to iterate over Scalar objects: use the iterators in nanoarrow.iterator to more effectively iterate over an Array.

Examples#

>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> array[0]
Scalar<int32> 1
>>> array[0].as_py()
1
>>> array[0].schema
<Schema> int32

as_py()#: Get the Python object representation of this scalar

property schema: Schema#: Get the schema (data type) of this scalar

array(obj, schema=None) → Array#: Alias for the Array class constructor. The use of nanoarrow.Array() is preferred over nanoarrow.array().