High-level ArrayStream Implementation#

class ArrayStream(obj, schema=None)#

High-level ArrayStream representation

The ArrayStream is nanoarrow’s high-level representation of zero or more contiguous arrays that have not neccessarily been materialized. This is in constrast to the nanoarrow Array, which consists of zero or more contiguous arrays but is always fully-materialized.

The ArrayStream is similar to pyarrow’s RecordBatchReader except it can also represent streams of non-struct arrays. Its scope maps to that of an``ArrowArrayStream`` as represented by the Arrow C Stream interface.

Parameters#

objarray or array stream-like

An array-like or array stream-like object as sanitized by c_array_stream().

schemaschema-like, optional

An optional schema, passed to c_array_stream().

Examples#

>>> import nanoarrow as na
>>> na.ArrayStream([1, 2, 3], na.int32())
nanoarrow.ArrayStream<int32>
close() None#

Release resources associated with this stream

Note that it is usually preferred to use the context manager to ensure prompt release of resources (e.g., open files) associated with this stream.

Examples#

>>> import nanoarrow as na
>>> stream = na.ArrayStream([1, 2, 3], na.int32())
>>> with stream:
...     pass
>>> stream.read_all()
Traceback (most recent call last):
...
RuntimeError: array stream is released
>>> stream = na.ArrayStream([1, 2, 3], na.int32())
>>> stream.close()
>>> stream.read_all()
Traceback (most recent call last):
...
RuntimeError: array stream is released
static from_path(obj, *args, **kwargs)#

Create an ArrayStream from an IPC stream at a local file path

Examples#

>>> import tempfile
>>> import os
>>> import nanoarrow as na
>>> from nanoarrow.ipc import InputStream
>>> with tempfile.TemporaryDirectory() as td:
...     path = os.path.join(td, "test.arrows")
...     with open(path, "wb") as f:
...         nbytes = f.write(InputStream.example_bytes())
...
...     with na.ArrayStream.from_path(path) as stream:
...         stream.read_all()
nanoarrow.Array<non-nullable struct<some_col: int32>>[3]
{'some_col': 1}
{'some_col': 2}
{'some_col': 3}
static from_readable(obj)#

Create an ArrayStream from an IPC stream in a readable file or buffer

Examples#

>>> import nanoarrow as na
>>> from nanoarrow.ipc import InputStream
>>> with na.ArrayStream.from_readable(InputStream.example_bytes()) as stream:
...     stream.read_all()
nanoarrow.Array<non-nullable struct<some_col: int32>>[3]
{'some_col': 1}
{'some_col': 2}
{'some_col': 3}
static from_url(obj, *args, **kwargs)#

Create an ArrayStream from an IPC stream at a URL

Examples#

>>> import pathlib
>>> import tempfile
>>> import os
>>> import nanoarrow as na
>>> from nanoarrow.ipc import InputStream
>>> with tempfile.TemporaryDirectory() as td:
...     path = os.path.join(td, "test.arrows")
...     with open(path, "wb") as f:
...         nbytes = f.write(InputStream.example_bytes())
...
...     uri = pathlib.Path(path).as_uri()
...     with na.ArrayStream.from_url(uri) as stream:
...         stream.read_all()
nanoarrow.Array<non-nullable struct<some_col: int32>>[3]
{'some_col': 1}
{'some_col': 2}
{'some_col': 3}
iter_chunks() Iterable[Array]#

Iterate over contiguous Arrays in this stream

For the ArrayStream, this is the same as iterating over the stream itself.

Examples#

>>> import nanoarrow as na
>>> stream = na.ArrayStream([1, 2, 3], na.int32())
>>> for chunk in stream:
...     print(chunk)
nanoarrow.Array<int32>[3]
1
2
3
iter_py() Iterable#

Iterate over the default Python representation of each element.

Examples#

>>> import nanoarrow as na
>>> stream = na.ArrayStream([1, 2, 3], na.int32())
>>> for item in stream.iter_py():
...     print(item)
1
2
3
iter_tuples() Iterable[Tuple]#

Iterate over rows of a struct stream as tuples

Examples#

>>> import nanoarrow as na
>>> import pyarrow as pa
>>> batch = pa.record_batch(
...     [pa.array([1, 2, 3]), pa.array(["a", "b", "c"])],
...     names=["col1", "col2"]
... )
>>> stream = na.ArrayStream(batch)
>>> for item in stream.iter_tuples():
...     print(item)
(1, 'a')
(2, 'b')
(3, 'c')
read_all() Array#

Materialize the entire stream into an Array

>>> import nanoarrow as na
>>> stream = na.ArrayStream([1, 2, 3], na.int32())
>>> stream.read_all()
nanoarrow.Array<int32>[3]
1
2
3
read_next() Array#

Materialize the next contiguous Array in this stream

This method raises StopIteration if there are no more arrays in this stream.

>>> import nanoarrow as na
>>> stream = na.ArrayStream([1, 2, 3], na.int32())
>>> stream.read_next()
nanoarrow.Array<int32>[3]
1
2
3
property schema#

The Schema associated with this stream

>>> import nanoarrow as na
>>> stream = na.ArrayStream([1, 2, 3], na.int32())
>>> stream.schema
<Schema> int32