Python API reference#

Python bindings to the nanoarrow C library

EXPERIMNETAL

The nanoarrow Python package provides bindings to the nanoarrow C library. Like the nanoarrow C library, it provides tools to facilitate the use of the Arrow C Data and Arrow C Stream interfaces.

class Schema(obj, *, name=None, nullable=None, **params)#

The Schema is nanoarrow’s high-level data type representation whose scope maps to that of the ArrowSchema in the Arrow C Data interface. See schema() for class details.

property byte_width: int | None#

Element byte width for fixed-size binary type

Returns None for types for which this property is not relevant.

>>> import nanoarrow as na
>>> na.fixed_size_binary(123).byte_width
123
field(i)#

Extract a child Schema

>>> import nanoarrow as na
>>> schema = na.struct({"col1": na.int32()})
>>> schema.field(0)
Schema(INT32, name='col1')
property fields#

Iterate over child Schemas

>>> import nanoarrow as na
>>> schema = na.struct({"col1": na.int32()})
>>> for field in schema.fields:
...     print(field.name)
...
col1
property n_fields: int#

Number of child Schemas

>>> import nanoarrow as na
>>> schema = na.struct({"col1": na.int32()})
>>> schema.n_fields
1
property name: str | None#

Field name of this Schema

>>> import nanoarrow as na
>>> schema = na.struct({"col1": na.int32()})
>>> schema.field(0).name
'col1'
property nullable: bool#

Nullability of this field

>>> import nanoarrow as na
>>> na.int32().nullable
True
>>> na.int32(nullable=False).nullable
False
property precision: int#

Decimal precision

>>> import nanoarrow as na
>>> na.decimal128(10, 3).precision
10
property scale: int#

Decimal scale

>>> import nanoarrow as na
>>> na.decimal128(10, 3).scale
3
property timezone: str | None#

Timezone for timestamp types

Returns None for types for which this property is not relevant or for timezone types for which the timezone is not set.

>>> import nanoarrow as na
>>> na.timestamp(na.TimeUnit.SECOND, timezone="America/Halifax").timezone
'America/Halifax'
property type: Type#

Type enumerator value of this Schema

>>> import nanoarrow as na
>>> na.int32().type
<Type.INT32: 8>
property unit: TimeUnit | None#

TimeUnit for timestamp, time, and duration types

Returns None for types for which this property is not relevant.

>>> import nanoarrow as na
>>> na.timestamp(na.TimeUnit.SECOND).unit
<TimeUnit.SECOND: 0>
class TimeUnit(value)#

Unit enumerator for timestamp, duration, and time types.

static create(obj)#

Create a TimeUnit from parameter input.

This constructor will accept the abbreviations “s”, “ms”, “us”, and “ns” and return the appropriate enumerator value.

>>> import nanoarrow as na
>>> na.TimeUnit.create("s")
<TimeUnit.SECOND: 0>
class Type(value)#

The Type enumerator provides a means by which the various type categories can be identified. Type values can be used in place of Schema instances in most places for parameter-free types.

allocate_c_array(requested_schema=None)#

Allocate an uninitialized ArrowArray

Examples#

>>> import pyarrow as pa
>>> import nanoarrow as na
>>> schema = na.allocate_c_schema()
>>> pa.int32()._export_to_c(schema._addr())
allocate_c_array_stream()#

Allocate an uninitialized ArrowArrayStream wrapper

Examples#

>>> import pyarrow as pa
>>> import nanoarrow as na
>>> pa_column = pa.array([1, 2, 3], pa.int32())
>>> pa_batch = pa.record_batch([pa_column], names=["col1"])
>>> pa_reader = pa.RecordBatchReader.from_batches(pa_batch.schema, [pa_batch])
>>> array_stream = na.allocate_c_array_stream()
>>> pa_reader._export_to_c(array_stream._addr())
allocate_c_schema()#

Allocate an uninitialized ArrowSchema wrapper

Examples#

>>> import pyarrow as pa
>>> import nanoarrow as na
>>> schema = na.allocate_c_schema()
>>> pa.int32()._export_to_c(schema._addr())
binary(nullable: bool = True) Schema#

Create an instance of a variable or fixed-width binary type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.binary()
Schema(BINARY)
bool(nullable: bool = True) Schema#

Create an instance of a boolean type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.bool()
Schema(BOOL)
c_array(obj=None, requested_schema=None) CArray#

ArrowArray wrapper

This class provides a user-facing interface to access the fields of an ArrowArray as defined in the Arrow C Data interface, holding an optional reference to a CSchema that can be used to safely deserialize the content.

These objects are created using c_array(), which accepts any array-like object according to the Arrow PyCapsule interface.

This Python wrapper allows access to array fields but does not automatically deserialize their content: use c_array_view() to validate and deserialize the content into a more easily inspectable object.

Note that the CArray objects returned by .child() hold strong references to the original ArrowSchema to avoid copies while inspecting an imported structure.

Examples#

>>> import pyarrow as pa
>>> import numpy as np
>>> import nanoarrow as na
>>> array = na.c_array(pa.array(["one", "two", "three", None]))
>>> array.length
4
>>> array.null_count
1
c_array_stream(obj=None, requested_schema=None) CArrayStream#

ArrowArrayStream wrapper

This class provides a user-facing interface to access the fields of an ArrowArrayStream as defined in the Arrow C Stream interface. These objects are usually created using nanoarrow.c_array_stream().

Examples#

>>> import pyarrow as pa
>>> import nanoarrow as na
>>> pa_column = pa.array([1, 2, 3], pa.int32())
>>> pa_batch = pa.record_batch([pa_column], names=["col1"])
>>> pa_reader = pa.RecordBatchReader.from_batches(pa_batch.schema, [pa_batch])
>>> array_stream = na.c_array_stream(pa_reader)
>>> array_stream.get_schema()
<nanoarrow.c_lib.CSchema struct>
- format: '+s'
- name: ''
- flags: 0
- metadata: NULL
- dictionary: NULL
- children[1]:
  'col1': <nanoarrow.c_lib.CSchema int32>
    - format: 'i'
    - name: 'col1'
    - flags: 2
    - metadata: NULL
    - dictionary: NULL
    - children[0]:
>>> array_stream.get_next().length
3
>>> array_stream.get_next() is None
Traceback (most recent call last):
  ...
StopIteration
c_array_view(obj, requested_schema=None) CArrayView#

ArrowArrayView wrapper

The ArrowArrayView is a nanoarrow C library structure that provides structured access to buffers addresses, buffer sizes, and buffer data types. The buffer data is usually propagated from an ArrowArray but can also be propagated from other types of objects (e.g., serialized IPC). The offset and length of this view are independent of its parent (i.e., this object can also represent a slice of its parent).

Examples#

>>> import pyarrow as pa
>>> import numpy as np
>>> import nanoarrow as na
>>> array = na.c_array(pa.array(["one", "two", "three", None]))
>>> array_view = na.c_array_view(array)
>>> np.array(array_view.buffer(1))
array([ 0,  3,  6, 11, 11], dtype=int32)
>>> np.array(array_view.buffer(2))
array([b'o', b'n', b'e', b't', b'w', b'o', b't', b'h', b'r', b'e', b'e'],
      dtype='|S1')
c_schema(obj=None) CSchema#

ArrowSchema wrapper

The CSchema class provides a Python-friendly interface to access the fields of an ArrowSchema as defined in the Arrow C Data interface. These objects are created using nanoarrow.c_schema(), which accepts any schema or data type-like object according to the Arrow PyCapsule interface.

This Python wrapper allows access to schema struct members but does not automatically deserialize their content: use c_schema_view() to validate and deserialize the content into a more easily inspectable object.

Note that the CSchema objects returned by .child() hold strong references to the original ArrowSchema to avoid copies while inspecting an imported structure.

Examples#

>>> import pyarrow as pa
>>> import nanoarrow as na
>>> schema = na.c_schema(pa.int32())
>>> schema.is_valid()
True
>>> schema.format
'i'
>>> schema.name
''
c_schema_view(obj) CSchemaView#

ArrowSchemaView wrapper

The ArrowSchemaView is a nanoarrow C library structure that facilitates access to the deserialized content of an ArrowSchema (e.g., parameter values for parameterized types). This wrapper extends that facility to Python.

Examples#

>>> import pyarrow as pa
>>> import nanoarrow as na
>>> schema = na.c_schema(pa.decimal128(10, 3))
>>> schema_view = na.c_schema_view(schema)
>>> schema_view.type
'decimal128'
>>> schema_view.decimal_bitwidth
128
>>> schema_view.decimal_precision
10
>>> schema_view.decimal_scale
3
c_version()#

Return the nanoarrow C library version string

date32(nullable: bool = True) Schema#

Create an instance of a 32-bit date type (days since 1970-01-01).

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.date32()
Schema(DATE32)
date64(nullable: bool = True) Schema#

Create an instance of a 64-bit date type (milliseconds since 1970-01-01).

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.date64()
Schema(DATE64)
decimal128(precision: int, scale: int, nullable: bool = True) Schema#

Create an instance of a 128-bit decimal type.

Parameters#

precisionint

The number of significant digits representable by this type. Must be between 1 and 38.

scaleint

The number of digits after the decimal point for values of this type.

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.decimal128(10, 3)
Schema(DECIMAL128, precision=10, scale=3)
decimal256(precision: int, scale: int, nullable: bool = True) Schema#

Create an instance of a 256-bit decimal type.

Parameters#

precisionint

The number of significant digits representable by this type. Must be between 1 and 76.

scaleint

The number of digits after the decimal point for values of this type.

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.decimal256(10, 3)
Schema(DECIMAL256, precision=10, scale=3)
duration(unit, nullable: bool = True)#

Create an instance of a duration type.

Parameters#

unitstr or TimeUnit

The unit of values stored by this type.

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.duration("s")
Schema(DURATION, unit=SECOND)
fixed_size_binary(byte_width: int, nullable: bool = True) Schema#

Create an instance of a variable or fixed-width binary type.

Parameters#

byte_widthint

The width of each element in bytes.

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.fixed_size_binary(123)
Schema(FIXED_SIZE_BINARY, byte_width=123)
float16(nullable: bool = True) Schema#

Create an instance of a 16-bit floating-point type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.float16()
Schema(HALF_FLOAT)
float32(nullable: bool = True) Schema#

Create an instance of a 32-bit floating-point type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.float32()
Schema(FLOAT)
float64(nullable: bool = True) Schema#

Create an instance of a 64-bit floating-point type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.float64()
Schema(DOUBLE)
int16(nullable: bool = True) Schema#

Create an instance of a signed 16-bit integer type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.int16()
Schema(INT16)
int32(nullable: bool = True) Schema#

Create an instance of a signed 32-bit integer type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.int32()
Schema(INT32)
int64(nullable: bool = True) Schema#

Create an instance of a signed 32-bit integer type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.int64()
Schema(INT64)
int8(nullable: bool = True) Schema#

Create an instance of a signed 8-bit integer type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.int8()
Schema(INT8)
interval_day_time(nullable: bool = True)#

Create an instance of an interval type measured as a day/time pair.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.interval_day_time()
Schema(INTERVAL_DAY_TIME)
interval_month_day_nano(nullable: bool = True)#

Create an instance of an interval type measured as a month/day/nanosecond tuple.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.interval_month_day_nano()
Schema(INTERVAL_MONTH_DAY_NANO)
interval_months(nullable: bool = True)#

Create an instance of an interval type measured in months.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.interval_months()
Schema(INTERVAL_MONTHS)
large_binary(nullable: bool = True) Schema#

Create an instance of a variable-length binary type that uses 64-bit offsets.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.large_binary()
Schema(LARGE_BINARY)
large_string(nullable: bool = True) Schema#

Create an instance of a variable-length UTF-8 encoded string type that uses 64-bit offsets.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.large_string()
Schema(LARGE_STRING)
null(nullable: bool = True) Schema#

Create an instance of a null type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.null()
Schema(NULL)
schema(obj, *, name=None, nullable=None, **params)#

Create a nanoarrow Schema

The Schema is nanoarrow’s high-level data type representation, encompasing the role of PyArrow’s Schema, Field, and DataType. This scope maps to that of the ArrowSchema in the Arrow C Data interface.

Parameters#

obj :

A Type specifier or a schema-like object. A schema-like object includes: * A pyarrow.Schema, pyarrow.Field`, or pyarrow.DataType * A nanoarrow Schema, CSchema, or Type * Any object implementing the Arrow PyCapsule interface protocol method.

namestr, optional

An optional name to bind to this field.

nullablebool, optional

Explicitly specify field nullability. Fields are nullable by default. Only supported if obj is a Type object (for any other input, the nullability is preserved from the passed object).

params :

Type-specific parameters when obj is a Type.

Examples#

>>> import nanoarrow as na
>>> import pyarrow as pa
>>> na.schema(na.Type.INT32)
Schema(INT32)
>>> na.schema(na.Type.DURATION, unit=na.TimeUnit.SECOND)
Schema(DURATION, unit=SECOND)
>>> na.schema(pa.int32())
Schema(INT32)
string(nullable: bool = True) Schema#

Create an instance of a variable-length UTF-8 encoded string type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.string()
Schema(STRING)
struct(fields, nullable=True) Schema#

Create a type representing a named sequence of fields.

Parameters#

fields :
  • A dictionary whose keys are field names and values are schema-like objects

  • An iterable whose items are a schema like object or a two-tuple of the field name and a schema-like object. If a field name is not specified from the tuple, the field name is inherited from the schema-like object.

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.struct([na.int32()])
Schema(STRUCT, fields=[Schema(INT32)])
>>> na.struct([("col1", na.int32())])
Schema(STRUCT, fields=[Schema(INT32, name='col1')])
>>> na.struct({"col1": na.int32()})
Schema(STRUCT, fields=[Schema(INT32, name='col1')])
time32(unit: str | TimeUnit, nullable: bool = True) Schema#

Create an instance of a 32-bit time of day type.

Parameters#

unitstr or TimeUnit

The unit of values stored by this type.

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.time32("s")
Schema(TIME32, unit=SECOND)
time64(unit: str | TimeUnit, nullable: bool = True) Schema#

Create an instance of a 64-bit time of day type.

Parameters#

unitstr or TimeUnit

The unit of values stored by this type.

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.time64("us")
Schema(TIME64, unit=MICRO)
timestamp(unit: str | TimeUnit, timezone: str | None = None, nullable: bool = True) Schema#

Create an instance of a timestamp type.

Parameters#

unitstr or TimeUnit

The unit of values stored by this type.

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.timestamp("s")
Schema(TIMESTAMP, unit=SECOND)
>>> na.timestamp("s", timezone="America/Halifax")
Schema(TIMESTAMP, unit=SECOND, timezone='America/Halifax')
uint16(nullable: bool = True) Schema#

Create an instance of an unsigned 16-bit integer type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.uint16()
Schema(UINT16)
uint32(nullable: bool = True) Schema#

Create an instance of an unsigned 32-bit integer type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.uint32()
Schema(UINT32)
uint64(nullable: bool = True) Schema#

Create an instance of an unsigned 32-bit integer type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.uint64()
Schema(UINT64)
uint8(nullable: bool = True) Schema#

Create an instance of an unsigned 8-bit integer type.

Parameters#

nullablebool, optional

Use False to mark this field as non-nullable.

Examples#

>>> import nanoarrow as na
>>> na.uint8()
Schema(UINT8)