C API Reference#

group nanoarrow

Except where noted, objects are not thread-safe and clients should take care to serialize accesses to methods.

Because this library is intended to be vendored, it provides full type definitions and encourages clients to stack or statically allocate where convenient.

Creating schemas#

group nanoarrow-schema

These functions allocate, copy, and destroy ArrowSchema structures

Functions

void ArrowSchemaInit(struct ArrowSchema *schema)#

Initialize an ArrowSchema.

Initializes the fields and release callback of schema_out. Caller is responsible for calling the schema->release callback if NANOARROW_OK is returned.

ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema *schema, enum ArrowType type)#

Initialize an ArrowSchema from an ArrowType.

A convenience constructor for that calls ArrowSchemaInit() and ArrowSchemaSetType() for the common case of constructing an unparameterized type. The caller is responsible for calling the schema->release callback if NANOARROW_OK is returned.

int64_t ArrowSchemaToString(const struct ArrowSchema *schema, char *out, int64_t n, char recursive)#

Get a human-readable summary of a Schema.

Writes a summary of an ArrowSchema to out (up to n - 1 characters) and returns the number of characters required for the output if n were sufficiently large. If recursive is non-zero, the result will also include children.

ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema *schema, enum ArrowType type)#

Set the format field of a schema from an ArrowType.

Initializes the fields and release callback of schema_out. For NANOARROW_TYPE_LIST, NANOARROW_TYPE_LARGE_LIST, and NANOARROW_TYPE_MAP, the appropriate number of children are allocated, initialized, and named; however, the caller must ArrowSchemaSetType() on the preinitialized children. Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().

ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema *schema, int64_t n_children)#

Set the format field and initialize children of a struct schema.

The specified number of children are initialized; however, the caller is responsible for calling ArrowSchemaSetType() and ArrowSchemaSetName() on each child. Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().

ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema *schema, enum ArrowType type, int32_t fixed_size)#

Set the format field of a fixed-size schema.

Returns EINVAL for fixed_size <= 0 or for type that is not NANOARROW_TYPE_FIXED_SIZE_BINARY or NANOARROW_TYPE_FIXED_SIZE_LIST. For NANOARROW_TYPE_FIXED_SIZE_LIST, the appropriate number of children are allocated, initialized, and named; however, the caller must ArrowSchemaSetType() the first child. Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().

ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema *schema, enum ArrowType type, int32_t decimal_precision, int32_t decimal_scale)#

Set the format field of a decimal schema.

Returns EINVAL for scale <= 0 or for type that is not NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().

ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema *schema, enum ArrowType type, enum ArrowTimeUnit time_unit, const char *timezone)#

Set the format field of a time, timestamp, or duration schema.

Returns EINVAL for type that is not NANOARROW_TYPE_TIME32, NANOARROW_TYPE_TIME64, NANOARROW_TYPE_TIMESTAMP, or NANOARROW_TYPE_DURATION. The timezone parameter must be NULL for a non-timestamp type. Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().

ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema *schema, enum ArrowType type, int64_t n_children)#

Set the format field of a union schema.

Returns EINVAL for a type that is not NANOARROW_TYPE_DENSE_UNION or NANOARROW_TYPE_SPARSE_UNION. The specified number of children are allocated, and initialized.

ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema *schema, struct ArrowSchema *schema_out)#

Make a (recursive) copy of a schema.

Allocates and copies fields of schema into schema_out.

ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema *schema, const char *format)#

Copy format into schema->format.

schema must have been allocated using ArrowSchemaInitFromType() or ArrowSchemaDeepCopy().

ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema *schema, const char *name)#

Copy name into schema->name.

schema must have been allocated using ArrowSchemaInitFromType() or ArrowSchemaDeepCopy().

ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema *schema, const char *metadata)#

Copy metadata into schema->metadata.

schema must have been allocated using ArrowSchemaInitFromType() or ArrowSchemaDeepCopy.

ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema *schema, int64_t n_children)#

Allocate the schema->children array.

Includes the memory for each child struct ArrowSchema. schema must have been allocated using ArrowSchemaInitFromType() or ArrowSchemaDeepCopy().

ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema *schema)#

Allocate the schema->dictionary member.

schema must have been allocated using ArrowSchemaInitFromType() or ArrowSchemaDeepCopy().

Reading schemas#

group nanoarrow-schema-view

Defines

NANOARROW_FLAG_ALL_SUPPORTED#

Flags supported by ArrowSchemaViewInit()

Functions

ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView *schema_view, const struct ArrowSchema *schema, struct ArrowError *error)#

Initialize an ArrowSchemaView.

struct ArrowSchemaView#
#include <nanoarrow.h>

A non-owning view of a parsed ArrowSchema.

Contains more readily extractable values than a raw ArrowSchema. Clients can stack or statically allocate this structure but are encouraged to use the provided getters to ensure forward compatibility.

Public Members

const struct ArrowSchema *schema#

A pointer to the schema represented by this view.

enum ArrowType type#

The data type represented by the schema.

This value may be NANOARROW_TYPE_DICTIONARY if the schema has a non-null dictionary member; datetime types are valid values. This value will never be NANOARROW_TYPE_EXTENSION (see extension_name and/or extension_metadata to check for an extension type).

enum ArrowType storage_type#

The storage data type represented by the schema.

This value will never be NANOARROW_TYPE_DICTIONARY, NANOARROW_TYPE_EXTENSION or any datetime type. This value represents only the type required to interpret the buffers in the array.

struct ArrowLayout layout#

The storage layout represented by the schema.

struct ArrowStringView extension_name#

The extension type name if it exists.

If the ARROW:extension:name key is present in schema.metadata, extension_name.data will be non-NULL.

struct ArrowStringView extension_metadata#

The extension type metadata if it exists.

If the ARROW:extension:metadata key is present in schema.metadata, extension_metadata.data will be non-NULL.

int32_t fixed_size#

Format fixed size parameter.

This value is set when parsing a fixed-size binary or fixed-size list schema; this value is undefined for other types. For a fixed-size binary schema this value is in bytes; for a fixed-size list schema this value refers to the number of child elements for each element of the parent.

int32_t decimal_bitwidth#

Decimal bitwidth.

This value is set when parsing a decimal type schema; this value is undefined for other types.

int32_t decimal_precision#

Decimal precision.

This value is set when parsing a decimal type schema; this value is undefined for other types.

int32_t decimal_scale#

Decimal scale.

This value is set when parsing a decimal type schema; this value is undefined for other types.

enum ArrowTimeUnit time_unit#

Format time unit parameter.

This value is set when parsing a date/time type. The value is undefined for other types.

const char *timezone#

Format timezone parameter.

This value is set when parsing a timestamp type and represents the timezone format parameter. This value points to data within the schema and is undefined for other types.

const char *union_type_ids#

Union type ids parameter.

This value is set when parsing a union type and represents type ids parameter. This value points to data within the schema and is undefined for other types.

Creating arrays#

group nanoarrow-array

These functions allocate, copy, and destroy ArrowArray structures. Once an ArrowArray has been initialized via ArrowArrayInitFromType() or ArrowArrayInitFromSchema(), the caller is responsible for releasing it using the embedded release callback.

Enums

enum ArrowValidationLevel#

Validation level enumerator.

Values:

enumerator NANOARROW_VALIDATION_LEVEL_NONE#

Do not validate buffer sizes or content.

enumerator NANOARROW_VALIDATION_LEVEL_MINIMAL#

Validate buffer sizes that depend on array length but do not validate buffer sizes that depend on buffer data access.

enumerator NANOARROW_VALIDATION_LEVEL_DEFAULT#

Validate all buffer sizes, including those that require buffer data access, but do not perform any checks that are O(1) along the length of the buffers.

enumerator NANOARROW_VALIDATION_LEVEL_FULL#

Validate all buffer sizes and all buffer content. This is useful in the context of untrusted input or input that may have been corrupted in transit.

Functions

ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray *array, enum ArrowType storage_type)#

Initialize the fields of an array.

Initializes the fields and release callback of array. Caller is responsible for calling the array->release callback if NANOARROW_OK is returned.

ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray *array, const struct ArrowSchema *schema, struct ArrowError *error)#

Initialize the contents of an ArrowArray from an ArrowSchema.

Caller is responsible for calling the array->release callback if NANOARROW_OK is returned.

ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray *array, const struct ArrowArrayView *array_view, struct ArrowError *error)#

Initialize the contents of an ArrowArray from an ArrowArrayView.

Caller is responsible for calling the array->release callback if NANOARROW_OK is returned.

ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray *array, int64_t n_children)#

Allocate the array->children array.

Includes the memory for each child struct ArrowArray, whose members are marked as released and may be subsequently initialized with ArrowArrayInitFromType() or moved from an existing ArrowArray. schema must have been allocated using ArrowArrayInitFromType().

ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray *array)#

Allocate the array->dictionary member.

Includes the memory for the struct ArrowArray, whose contents is marked as released and may be subsequently initialized with ArrowArrayInitFromType() or moved from an existing ArrowArray. array must have been allocated using ArrowArrayInitFromType()

void ArrowArraySetValidityBitmap(struct ArrowArray *array, struct ArrowBitmap *bitmap)#

Set the validity bitmap of an ArrowArray.

array must have been allocated using ArrowArrayInitFromType()

ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray *array, int64_t i, struct ArrowBuffer *buffer)#

Set a buffer of an ArrowArray.

array must have been allocated using ArrowArrayInitFromType()

static inline struct ArrowBitmap *ArrowArrayValidityBitmap(struct ArrowArray *array)#

Get the validity bitmap of an ArrowArray.

array must have been allocated using ArrowArrayInitFromType()

static inline struct ArrowBuffer *ArrowArrayBuffer(struct ArrowArray *array, int64_t i)#

Get a buffer of an ArrowArray.

array must have been allocated using ArrowArrayInitFromType()

static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray *array)#

Start element-wise appending to an ArrowArray.

Initializes any values needed to use ArrowArrayAppend*() functions. All element-wise appenders append by value and return EINVAL if the exact value cannot be represented by the underlying storage type. array must have been allocated using ArrowArrayInitFromType()

ArrowErrorCode ArrowArrayReserve(struct ArrowArray *array, int64_t additional_size_elements)#

Reserve space for future appends.

For buffer sizes that can be calculated (i.e., not string data buffers or child array sizes for non-fixed-size arrays), recursively reserve space for additional elements. This is useful for reducing the number of reallocations that occur using the item-wise appenders.

static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray *array, int64_t n)#

Append a null value to an array.

static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray *array, int64_t n)#

Append an empty, non-null value to an array.

static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray *array, int64_t value)#

Append a signed integer value to an array.

Returns NANOARROW_OK if value can be exactly represented by the underlying storage type or EINVAL otherwise (e.g., value is outside the valid array range).

static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray *array, uint64_t value)#

Append an unsigned integer value to an array.

Returns NANOARROW_OK if value can be exactly represented by the underlying storage type or EINVAL otherwise (e.g., value is outside the valid array range).

static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray *array, double value)#

Append a double value to an array.

Returns NANOARROW_OK if value can be exactly represented by the underlying storage type or EINVAL otherwise (e.g., value is outside the valid array range or there is an attempt to append a non-integer to an array with an integer storage type).

static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray *array, struct ArrowBufferView value)#

Append a string of bytes to an array.

Returns NANOARROW_OK if value can be exactly represented by the underlying storage type, EOVERFLOW if appending value would overflow the offset type (e.g., if the data buffer would be larger than 2 GB for a non-large string type), or EINVAL otherwise (e.g., the underlying array is not a binary, string, large binary, large string, or fixed-size binary array, or value is the wrong size for a fixed-size binary array).

static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray *array, struct ArrowStringView value)#

Append a string value to an array.

Returns NANOARROW_OK if value can be exactly represented by the underlying storage type, EOVERFLOW if appending value would overflow the offset type (e.g., if the data buffer would be larger than 2 GB for a non-large string type), or EINVAL otherwise (e.g., the underlying array is not a string or large string array).

static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray *array, const struct ArrowInterval *value)#

Append a Interval to an array.

Returns NANOARROW_OK if value can be exactly represented by the underlying storage type or EINVAL otherwise.

static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray *array, const struct ArrowDecimal *value)#

Append a decimal value to an array.

Returns NANOARROW_OK if array is a decimal array with the appropriate bitwidth or EINVAL otherwise.

static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray *array)#

Finish a nested array element.

Appends a non-null element to the array based on the first child’s current length. Returns NANOARROW_OK if the item was successfully added, EOVERFLOW if the child of a list or map array would exceed INT_MAX elements, or EINVAL if the underlying storage type is not a struct, list, large list, or fixed-size list, or if there was an attempt to add a struct or fixed-size list element where the length of the child array(s) did not match the expected length.

static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray *array, int8_t type_id)#

Finish a union array element.

Appends an element to the union type ids buffer and increments array->length. For sparse unions, up to one element is added to non type-id children. Returns EINVAL if the underlying storage type is not a union, if type_id is not valid, or if child sizes after appending are inconsistent.

static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray *array)#

Shrink buffer capacity to the size required.

Also applies shrinking to any child arrays. array must have been allocated using ArrowArrayInitFromType

ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray *array, struct ArrowError *error)#

Finish building an ArrowArray.

Flushes any pointers from internal buffers that may have been reallocated into array->buffers and checks the actual size of the buffers against the expected size based on the final length. array must have been allocated using ArrowArrayInitFromType()

ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray *array, enum ArrowValidationLevel validation_level, struct ArrowError *error)#

Finish building an ArrowArray with explicit validation.

Finish building with an explicit validation level. This could perform less validation (i.e. NANOARROW_VALIDATION_LEVEL_NONE or NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU buffer data access is not possible or more validation (i.e., NANOARROW_VALIDATION_LEVEL_FULL) if buffer content was obtained from an untrusted or corruptible source.

Reading arrays#

group nanoarrow-array-view

These functions read and validate the contents ArrowArray structures.

Defines

NANOARROW_MAX_FIXED_BUFFERS#

The maximum number of buffers in an ArrowArrayView or ArrowLayout.

All currently supported types have 3 buffers or fewer; however, future types may involve a variable number of buffers (e.g., string view). These buffers will be represented by separate members of the ArrowArrayView or ArrowLayout.

Enums

enum ArrowBufferType#

Functional types of buffers as described in the Arrow Columnar Specification.

Values:

enumerator NANOARROW_BUFFER_TYPE_NONE#
enumerator NANOARROW_BUFFER_TYPE_VALIDITY#
enumerator NANOARROW_BUFFER_TYPE_TYPE_ID#
enumerator NANOARROW_BUFFER_TYPE_UNION_OFFSET#
enumerator NANOARROW_BUFFER_TYPE_DATA_OFFSET#
enumerator NANOARROW_BUFFER_TYPE_DATA#

Functions

void ArrowArrayViewInitFromType(struct ArrowArrayView *array_view, enum ArrowType storage_type)#

Initialize the contents of an ArrowArrayView.

static inline void ArrowArrayViewMove(struct ArrowArrayView *src, struct ArrowArrayView *dst)#

Move an ArrowArrayView.

Transfers the ArrowArrayView data and lifecycle management to another address and resets the contents of src.

ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView *array_view, const struct ArrowSchema *schema, struct ArrowError *error)#

Initialize the contents of an ArrowArrayView from an ArrowSchema.

ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView *array_view, int64_t n_children)#

Allocate the array_view->children array.

Includes the memory for each child struct ArrowArrayView

ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView *array_view)#

Allocate array_view->dictionary.

void ArrowArrayViewSetLength(struct ArrowArrayView *array_view, int64_t length)#

Set data-independent buffer sizes from length.

ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView *array_view, const struct ArrowArray *array, struct ArrowError *error)#

Set buffer sizes and data pointers from an ArrowArray.

ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView *array_view, const struct ArrowArray *array, struct ArrowError *error)#

Set buffer sizes and data pointers from an ArrowArray except for those that require dereferencing buffer content.

ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView *array_view, enum ArrowValidationLevel validation_level, struct ArrowError *error)#

Performs checks on the content of an ArrowArrayView.

If using ArrowArrayViewSetArray() to back array_view with an ArrowArray, the buffer sizes and some content (fist and last offset) have already been validated at the “default” level. If setting the buffer pointers and sizes otherwise, you may wish to perform checks at a different level. See documentation for ArrowValidationLevel for the details of checks performed at each level.

void ArrowArrayViewReset(struct ArrowArrayView *array_view)#

Reset the contents of an ArrowArrayView and frees resources.

static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView *array_view, int64_t i)#

Check for a null element in an ArrowArrayView.

static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView *array_view, int64_t i)#

Get the type id of a union array element.

static inline int8_t ArrowArrayViewUnionChildIndex(const struct ArrowArrayView *array_view, int64_t i)#

Get the child index of a union array element.

static inline int64_t ArrowArrayViewUnionChildOffset(const struct ArrowArrayView *array_view, int64_t i)#

Get the index to use into the relevant union child array.

static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView *array_view, int64_t i)#

Get an element in an ArrowArrayView as an integer.

This function does not check for null values, that values are actually integers, or that values are within a valid range for an int64.

static inline uint64_t ArrowArrayViewGetUIntUnsafe(const struct ArrowArrayView *array_view, int64_t i)#

Get an element in an ArrowArrayView as an unsigned integer.

This function does not check for null values, that values are actually integers, or that values are within a valid range for a uint64.

static inline double ArrowArrayViewGetDoubleUnsafe(const struct ArrowArrayView *array_view, int64_t i)#

Get an element in an ArrowArrayView as a double.

This function does not check for null values, or that values are within a valid range for a double.

static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(const struct ArrowArrayView *array_view, int64_t i)#

Get an element in an ArrowArrayView as an ArrowStringView.

This function does not check for null values.

static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(const struct ArrowArrayView *array_view, int64_t i)#

Get an element in an ArrowArrayView as an ArrowBufferView.

This function does not check for null values.

static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView *array_view, int64_t i, struct ArrowDecimal *out)#

Get an element in an ArrowArrayView as an ArrowDecimal.

This function does not check for null values. The out parameter must be initialized with ArrowDecimalInit() with the proper parameters for this type before calling this for the first time.

struct ArrowArrayView#
#include <nanoarrow_types.h>

A non-owning view of an ArrowArray.

This data structure provides access to the values contained within an ArrowArray with fields provided in a more readily-extractible form. You can re-use an ArrowArrayView for multiple ArrowArrays with the same storage type, use it to represent a hypothetical ArrowArray that does not exist yet, or use it to validate the buffers of a future ArrowArray.

Public Members

const struct ArrowArray *array#

The underlying ArrowArray or NULL if it has not been set or if the buffers in this ArrowArrayView are not backed by an ArrowArray.

int64_t offset#

The number of elements from the physical start of the buffers.

int64_t length#

The number of elements in this view.

int64_t null_count#

A cached null count or -1 to indicate that this value is unknown.

enum ArrowType storage_type#

The type used to store values in this array.

This type represents only the minimum required information to extract values from the array buffers (e.g., for a Date32 array, this value will be NANOARROW_TYPE_INT32). For dictionary-encoded arrays, this will be the index type.

struct ArrowLayout layout#

The buffer types, strides, and sizes of this Array’s buffers.

struct ArrowBufferView buffer_views[NANOARROW_MAX_FIXED_BUFFERS]#

This Array’s buffers as ArrowBufferView objects.

int64_t n_children#

The number of children of this view.

struct ArrowArrayView **children#

Pointers to views of this array’s children.

struct ArrowArrayView *dictionary#

Pointer to a view of this array’s dictionary.

int8_t *union_type_id_map#

Union type id to child index mapping.

If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer such that child_index == union_type_id_map[type_id] and type_id == union_type_id_map[128 + child_index]. This value may be NULL in the case where child_id == type_id.

Owning, growable buffers#

group nanoarrow-buffer

Functions

static inline void ArrowBufferInit(struct ArrowBuffer *buffer)#

Initialize an ArrowBuffer.

Initialize a buffer with a NULL, zero-size buffer using the default buffer allocator.

static inline ArrowErrorCode ArrowBufferSetAllocator(struct ArrowBuffer *buffer, struct ArrowBufferAllocator allocator)#

Set a newly-initialized buffer’s allocator.

Returns EINVAL if the buffer has already been allocated.

static inline void ArrowBufferReset(struct ArrowBuffer *buffer)#

Reset an ArrowBuffer.

Releases the buffer using the allocator’s free method if the buffer’s data member is non-null, sets the data member to NULL, and sets the buffer’s size and capacity to 0.

static inline void ArrowBufferMove(struct ArrowBuffer *src, struct ArrowBuffer *dst)#

Move an ArrowBuffer.

Transfers the buffer data and lifecycle management to another address and resets buffer.

static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer *buffer, int64_t new_size_bytes, char shrink_to_fit)#

Grow or shrink a buffer to a given size.

When shrinking the size of the buffer, the buffer is only reallocated if shrink_to_fit is non-zero.

static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer *buffer, int64_t additional_size_bytes)#

Ensure a buffer has at least a given additional capacity.

Ensures that the buffer has space to append at least additional_size_bytes, overallocating when required.

static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer *buffer, const void *data, int64_t size_bytes)#

Write data to buffer and increment the buffer size.

This function does not check that buffer has the required capacity

static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer *buffer, const void *data, int64_t size_bytes)#

Write data to buffer and increment the buffer size.

This function writes and ensures that the buffer has the required capacity, possibly by reallocating the buffer. Like ArrowBufferReserve, this will overallocate when reallocation is required.

static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer *buffer, uint8_t value, int64_t size_bytes)#

Write fill to buffer and increment the buffer size.

This function writes the specified number of fill bytes and ensures that the buffer has the required capacity,

static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer *buffer, int8_t value)#

Write an 8-bit integer to a buffer.

static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer *buffer, uint8_t value)#

Write an unsigned 8-bit integer to a buffer.

static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer *buffer, int16_t value)#

Write a 16-bit integer to a buffer.

static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer *buffer, uint16_t value)#

Write an unsigned 16-bit integer to a buffer.

static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer *buffer, int32_t value)#

Write a 32-bit integer to a buffer.

static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer *buffer, uint32_t value)#

Write an unsigned 32-bit integer to a buffer.

static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer *buffer, int64_t value)#

Write a 64-bit integer to a buffer.

static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer *buffer, uint64_t value)#

Write an unsigned 64-bit integer to a buffer.

static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer *buffer, double value)#

Write a double to a buffer.

static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer *buffer, float value)#

Write a float to a buffer.

static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer *buffer, struct ArrowStringView value)#

Write an ArrowStringView to a buffer.

static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer *buffer, struct ArrowBufferView value)#

Write an ArrowBufferView to a buffer.

struct ArrowBufferAllocator#
#include <nanoarrow_types.h>

Array buffer allocation and deallocation.

Container for allocate, reallocate, and free methods that can be used to customize allocation and deallocation of buffers when constructing an ArrowArray.

Public Members

uint8_t *(*reallocate)(struct ArrowBufferAllocator *allocator, uint8_t *ptr, int64_t old_size, int64_t new_size)#

Reallocate a buffer or return NULL if it cannot be reallocated.

void (*free)(struct ArrowBufferAllocator *allocator, uint8_t *ptr, int64_t size)#

Deallocate a buffer allocated by this allocator.

void *private_data#

Opaque data specific to the allocator.

struct ArrowBuffer#
#include <nanoarrow_types.h>

An owning mutable view of a buffer.

Public Members

uint8_t *data#

A pointer to the start of the buffer.

If capacity_bytes is 0, this value may be NULL.

int64_t size_bytes#

The size of the buffer in bytes.

int64_t capacity_bytes#

The capacity of the buffer in bytes.

struct ArrowBufferAllocator allocator#

The allocator that will be used to reallocate and/or free the buffer.

Bitmap utilities#

group nanoarrow-bitmap

Functions

static inline int8_t ArrowBitGet(const uint8_t *bits, int64_t i)#

Extract a boolean value from a bitmap.

static inline void ArrowBitSet(uint8_t *bits, int64_t i)#

Set a boolean value to a bitmap to true.

static inline void ArrowBitClear(uint8_t *bits, int64_t i)#

Set a boolean value to a bitmap to false.

static inline void ArrowBitSetTo(uint8_t *bits, int64_t i, uint8_t value)#

Set a boolean value to a bitmap.

static inline void ArrowBitsSetTo(uint8_t *bits, int64_t start_offset, int64_t length, uint8_t bits_are_set)#

Set a boolean value to a range in a bitmap.

static inline int64_t ArrowBitCountSet(const uint8_t *bits, int64_t i_from, int64_t i_to)#

Count true values in a bitmap.

static inline void ArrowBitsUnpackInt8(const uint8_t *bits, int64_t start_offset, int64_t length, int8_t *out)#

Extract int8 boolean values from a range in a bitmap.

static inline void ArrowBitsUnpackInt32(const uint8_t *bits, int64_t start_offset, int64_t length, int32_t *out)#

Extract int32 boolean values from a range in a bitmap.

static inline void ArrowBitmapInit(struct ArrowBitmap *bitmap)#

Initialize an ArrowBitmap.

Initialize the builder’s buffer, empty its cache, and reset the size to zero

static inline void ArrowBitmapMove(struct ArrowBitmap *src, struct ArrowBitmap *dst)#

Move an ArrowBitmap.

Transfers the underlying buffer data and lifecycle management to another address and resets the bitmap.

static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap *bitmap, int64_t additional_size_bits)#

Ensure a bitmap builder has at least a given additional capacity.

Ensures that the buffer has space to append at least additional_size_bits, overallocating when required.

static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap *bitmap, int64_t new_size_bits, char shrink_to_fit)#

Grow or shrink a bitmap to a given size.

When shrinking the size of the bitmap, the bitmap is only reallocated if shrink_to_fit is non-zero.

static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap *bitmap, uint8_t bits_are_set, int64_t length)#

Reserve space for and append zero or more of the same boolean value to a bitmap.

static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap *bitmap, uint8_t bits_are_set, int64_t length)#

Append zero or more of the same boolean value to a bitmap.

static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap *bitmap, const int8_t *values, int64_t n_values)#

Append boolean values encoded as int8_t to a bitmap.

The values must all be 0 or 1.

static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap *bitmap, const int32_t *values, int64_t n_values)#

Append boolean values encoded as int32_t to a bitmap.

The values must all be 0 or 1.

static inline void ArrowBitmapReset(struct ArrowBitmap *bitmap)#

Reset a bitmap builder.

Releases any memory held by buffer, empties the cache, and resets the size to zero

struct ArrowBitmap#
#include <nanoarrow_types.h>

An owning mutable view of a bitmap.

Public Members

struct ArrowBuffer buffer#

An ArrowBuffer to hold the allocated memory.

int64_t size_bits#

The number of bits that have been appended to the bitmap.

Create, read, and modify schema metadata#

group nanoarrow-metadata

Functions

ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader *reader, const char *metadata)#

Initialize an ArrowMetadataReader.

ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader *reader, struct ArrowStringView *key_out, struct ArrowStringView *value_out)#

Read the next key/value pair from an ArrowMetadataReader.

int64_t ArrowMetadataSizeOf(const char *metadata)#

The number of bytes in in a key/value metadata string.

char ArrowMetadataHasKey(const char *metadata, struct ArrowStringView key)#

Check for a key in schema metadata.

ArrowErrorCode ArrowMetadataGetValue(const char *metadata, struct ArrowStringView key, struct ArrowStringView *value_out)#

Extract a value from schema metadata.

If key does not exist in metadata, value_out is unmodified

ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer *buffer, const char *metadata)#

Initialize a builder for schema metadata from key/value pairs.

metadata can be an existing metadata string or NULL to initialize an empty metadata string.

ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer *buffer, struct ArrowStringView key, struct ArrowStringView value)#

Append a key/value pair to a buffer containing serialized metadata.

ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer *buffer, struct ArrowStringView key, struct ArrowStringView value)#

Set a key/value pair to a buffer containing serialized metadata.

Ensures that the only entry for key in the metadata is set to value. This function maintains the existing position of (the first instance of) key if present in the data.

ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer *buffer, struct ArrowStringView key)#

Remove a key from a buffer containing serialized metadata.

struct ArrowMetadataReader#
#include <nanoarrow.h>

Reader for key/value pairs in schema metadata.

The ArrowMetadataReader does not own any data and is only valid for the lifetime of the underlying metadata pointer.

Public Members

const char *metadata#

A metadata string from a schema->metadata field.

int64_t offset#

The current offset into the metadata string.

int32_t remaining_keys#

The number of remaining keys.

Memory management#

group nanoarrow-malloc

Non-buffer members of a struct ArrowSchema and struct ArrowArray must be allocated using ArrowMalloc() or ArrowRealloc() and freed using ArrowFree() for schemas and arrays allocated here. Buffer members are allocated using an ArrowBufferAllocator.

Functions

void *ArrowMalloc(int64_t size)#

Allocate like malloc()

void *ArrowRealloc(void *ptr, int64_t size)#

Reallocate like realloc()

void ArrowFree(void *ptr)#

Free a pointer allocated using ArrowMalloc() or ArrowRealloc().

struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void)#

Return the default allocator.

The default allocator uses ArrowMalloc(), ArrowRealloc(), and ArrowFree().

struct ArrowBufferAllocator ArrowBufferDeallocator(ArrowBufferDeallocatorCallback, void *private_data)#

Create a custom deallocator.

Creates a buffer allocator with only a free method that can be used to attach a custom deallocator to an ArrowBuffer. This may be used to avoid copying an existing buffer that was not allocated using the infrastructure provided here (e.g., by an R or Python object).

Error handling#

group nanoarrow-errors

Functions generally return an errno-compatible error code; functions that need to communicate more verbose error information accept a pointer to an ArrowError. This can be stack or statically allocated. The content of the message is undefined unless an error code has been returned. If a nanoarrow function is passed a non-null ArrowError pointer, the ArrowError pointed to by the argument will be propagated with a null-terminated error message. It is safe to pass a NULL ArrowError anywhere in the nanoarrow API.

Except where documented, it is generally not safe to continue after a function has returned a non-zero ArrowErrorCode. The NANOARROW_RETURN_NOT_OK and NANOARROW_ASSERT_OK macros are provided to help propagate errors. C++ clients can use the helpers provided in the nanoarrow.hpp header to facilitate using C++ idioms for memory management and error propgagtion.

Defines

NANOARROW_OK#

Return code for success.

NANOARROW_RETURN_NOT_OK(EXPR)#

Check the result of an expression and return it if not NANOARROW_OK.

NANOARROW_RETURN_NOT_OK_WITH_ERROR(EXPR, ERROR_EXPR)#

Check the result of an expression and return it if not NANOARROW_OK, adding an auto-generated message to an ArrowError.

This macro is used to ensure that functions that accept an ArrowError as input always set its message when returning an error code (e.g., when calling a nanoarrow function that does not accept ArrowError).

Typedefs

typedef int ArrowErrorCode#

Represents an errno-compatible error code.

Functions

NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorSet(struct ArrowError *error, const char *fmt, ...)#

Set the contents of an error using printf syntax.

If error is NULL, this function does nothing and returns NANOARROW_OK.

static inline void ArrowErrorInit(struct ArrowError *error)#

Ensure an ArrowError is null-terminated by zeroing the first character.

If error is NULL, this function does nothing.

static inline const char *ArrowErrorMessage(struct ArrowError *error)#

Get the contents of an error.

If error is NULL, returns “”, or returns the contents of the error message otherwise.

static inline void ArrowErrorSetString(struct ArrowError *error, const char *src)#

Set the contents of an error from an existing null-terminated string.

If error is NULL, this function does nothing.

struct ArrowError#
#include <nanoarrow_types.h>

Error type containing a UTF-8 encoded message.

Public Members

char message[1024]#

A character buffer with space for an error message.

Utility data structures#

group nanoarrow-utils

Enums

enum ArrowType#

Arrow type enumerator.

These names are intended to map to the corresponding arrow::Type::type enumerator; however, the numeric values are specifically not equal (i.e., do not rely on numeric comparison).

Values:

enumerator NANOARROW_TYPE_UNINITIALIZED#
enumerator NANOARROW_TYPE_NA#
enumerator NANOARROW_TYPE_BOOL#
enumerator NANOARROW_TYPE_UINT8#
enumerator NANOARROW_TYPE_INT8#
enumerator NANOARROW_TYPE_UINT16#
enumerator NANOARROW_TYPE_INT16#
enumerator NANOARROW_TYPE_UINT32#
enumerator NANOARROW_TYPE_INT32#
enumerator NANOARROW_TYPE_UINT64#
enumerator NANOARROW_TYPE_INT64#
enumerator NANOARROW_TYPE_HALF_FLOAT#
enumerator NANOARROW_TYPE_FLOAT#
enumerator NANOARROW_TYPE_DOUBLE#
enumerator NANOARROW_TYPE_STRING#
enumerator NANOARROW_TYPE_BINARY#
enumerator NANOARROW_TYPE_FIXED_SIZE_BINARY#
enumerator NANOARROW_TYPE_DATE32#
enumerator NANOARROW_TYPE_DATE64#
enumerator NANOARROW_TYPE_TIMESTAMP#
enumerator NANOARROW_TYPE_TIME32#
enumerator NANOARROW_TYPE_TIME64#
enumerator NANOARROW_TYPE_INTERVAL_MONTHS#
enumerator NANOARROW_TYPE_INTERVAL_DAY_TIME#
enumerator NANOARROW_TYPE_DECIMAL128#
enumerator NANOARROW_TYPE_DECIMAL256#
enumerator NANOARROW_TYPE_LIST#
enumerator NANOARROW_TYPE_STRUCT#
enumerator NANOARROW_TYPE_SPARSE_UNION#
enumerator NANOARROW_TYPE_DENSE_UNION#
enumerator NANOARROW_TYPE_DICTIONARY#
enumerator NANOARROW_TYPE_MAP#
enumerator NANOARROW_TYPE_EXTENSION#
enumerator NANOARROW_TYPE_FIXED_SIZE_LIST#
enumerator NANOARROW_TYPE_DURATION#
enumerator NANOARROW_TYPE_LARGE_STRING#
enumerator NANOARROW_TYPE_LARGE_BINARY#
enumerator NANOARROW_TYPE_LARGE_LIST#
enumerator NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO#
enum ArrowTimeUnit#

Arrow time unit enumerator.

These names and values map to the corresponding arrow::TimeUnit::type enumerator.

Values:

enumerator NANOARROW_TIME_UNIT_SECOND#
enumerator NANOARROW_TIME_UNIT_MILLI#
enumerator NANOARROW_TIME_UNIT_MICRO#
enumerator NANOARROW_TIME_UNIT_NANO#

Functions

const char *ArrowNanoarrowVersion(void)#

Return a version string in the form “major.minor.patch”.

int ArrowNanoarrowVersionInt(void)#

Return an integer that can be used to compare versions sequentially.

void ArrowLayoutInit(struct ArrowLayout *layout, enum ArrowType storage_type)#

Initialize a description of buffer arrangements from a storage type.

static inline struct ArrowStringView ArrowCharView(const char *value)#

Create a string view from a null-terminated string.

ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal *decimal, struct ArrowStringView value)#

Sets the integer value of an ArrowDecimal from a string.

ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal *decimal, struct ArrowBuffer *buffer)#

Get the integer value of an ArrowDecimal as string.

static inline int64_t ArrowResolveChunk64(int64_t index, const int64_t *offsets, int64_t lo, int64_t hi)#

Resolve a chunk index from increasing int64_t offsets.

Given a buffer of increasing int64_t offsets that begin with 0 (e.g., offset buffer of a large type, run ends of a chunked array implementation), resolve a value v where lo <= v < hi such that offsets[v] <= index < offsets[v + 1].

static inline const char *ArrowTypeString(enum ArrowType type)#

Get a string value of an enum ArrowType value.

Returns NULL for invalid values for type

static inline const char *ArrowTimeUnitString(enum ArrowTimeUnit time_unit)#

Get a string value of an enum ArrowTimeUnit value.

Returns NULL for invalid values for time_unit

static inline void ArrowIntervalInit(struct ArrowInterval *interval, enum ArrowType type)#

Zero initialize an Interval with a given unit.

static inline void ArrowDecimalInit(struct ArrowDecimal *decimal, int32_t bitwidth, int32_t precision, int32_t scale)#

Initialize a decimal with a given set of type parameters.

static inline void ArrowDecimalGetBytes(const struct ArrowDecimal *decimal, uint8_t *out)#

Copy the bytes of this decimal into a sufficiently large buffer.

static inline int64_t ArrowDecimalSign(const struct ArrowDecimal *decimal)#

Returns 1 if the value represented by decimal is >= 0 or -1 otherwise.

static inline void ArrowDecimalSetInt(struct ArrowDecimal *decimal, int64_t value)#

Sets the integer value of this decimal.

static inline void ArrowDecimalNegate(struct ArrowDecimal *decimal)#

Negate the value of this decimal in place.

static inline void ArrowDecimalSetBytes(struct ArrowDecimal *decimal, const uint8_t *value)#

Copy bytes from a buffer into this decimal.

struct ArrowStringView#
#include <nanoarrow_types.h>

An non-owning view of a string.

Public Members

const char *data#

A pointer to the start of the string.

If size_bytes is 0, this value may be NULL.

int64_t size_bytes#

The size of the string in bytes,.

(Not including the null terminator.)

struct ArrowBufferView#
#include <nanoarrow_types.h>

An non-owning view of a buffer.

Public Members

union ArrowBufferViewData data#

A pointer to the start of the buffer.

If size_bytes is 0, this value may be NULL.

int64_t size_bytes#

The size of the buffer in bytes.

struct ArrowLayout#
#include <nanoarrow_types.h>

A description of an arrangement of buffers.

Contains the minimum amount of information required to calculate the size of each buffer in an ArrowArray knowing only the length and offset of the array.

Public Members

enum ArrowBufferType buffer_type[NANOARROW_MAX_FIXED_BUFFERS]#

The function of each buffer.

enum ArrowType buffer_data_type[NANOARROW_MAX_FIXED_BUFFERS]#

The data type of each buffer.

int64_t element_size_bits[NANOARROW_MAX_FIXED_BUFFERS]#

The size of an element each buffer or 0 if this size is variable or unknown.

int64_t child_size_elements#

The number of elements in the child array per element in this array for a fixed-size list.

struct ArrowInterval#
#include <nanoarrow_types.h>

A representation of an interval.

Public Members

enum ArrowType type#

The type of interval being used.

int32_t months#

The number of months represented by the interval.

int32_t days#

The number of days represented by the interval.

int32_t ms#

The number of ms represented by the interval.

int64_t ns#

The number of ns represented by the interval.

struct ArrowDecimal#
#include <nanoarrow_types.h>

A representation of a fixed-precision decimal number.

This structure should be initialized with ArrowDecimalInit() once and values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(), or ArrowDecimalSetBytes256().

Public Members

uint64_t words[4]#

An array of 64-bit integers of n_words length defined in native-endian order.

int32_t precision#

The number of significant digits this decimal number can represent.

int32_t scale#

The number of digits after the decimal point. This can be negative.

int n_words#

The number of words in the words array.

int high_word_index#

Cached value used by the implementation.

int low_word_index#

Cached value used by the implementation.

Arrow C Data Interface#

group nanoarrow-arrow-cdata

The Arrow C Data (https://arrow.apache.org/docs/format/CDataInterface.html) and Arrow C Stream (https://arrow.apache.org/docs/format/CStreamInterface.html) interfaces are part of the Arrow Columnar Format specification (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for documentation of these structures.

Defines

ARROW_C_DATA_INTERFACE#
ARROW_FLAG_DICTIONARY_ORDERED#
ARROW_FLAG_NULLABLE#
ARROW_FLAG_MAP_KEYS_SORTED#
ARROW_C_STREAM_INTERFACE#

Functions

static inline void ArrowSchemaMove(struct ArrowSchema *src, struct ArrowSchema *dst)#

Move the contents of an src ArrowSchema into dst and set src->release to NULL.

static inline void ArrowSchemaRelease(struct ArrowSchema *schema)#

Call the release callback of an ArrowSchema.

static inline void ArrowArrayMove(struct ArrowArray *src, struct ArrowArray *dst)#

Move the contents of an src ArrowArray into dst and set src->release to NULL.

static inline void ArrowArrayStreamMove(struct ArrowArrayStream *src, struct ArrowArrayStream *dst)#

Move the contents of an src ArrowArrayStream into dst and set src->release to NULL.

static inline ArrowErrorCode ArrowArrayStreamGetSchema(struct ArrowArrayStream *array_stream, struct ArrowSchema *out, struct ArrowError *error)#

Call the get_schema callback of an ArrowArrayStream.

Unlike the get_schema callback, this wrapper checks the return code and propagates the error reported by get_last_error into error. This makes it significantly less verbose to iterate over array streams using NANOARROW_RETURN_NOT_OK()-style error handling.

static inline ArrowErrorCode ArrowArrayStreamGetNext(struct ArrowArrayStream *array_stream, struct ArrowArray *out, struct ArrowError *error)#

Call the get_schema callback of an ArrowArrayStream.

Unlike the get_next callback, this wrapper checks the return code and propagates the error reported by get_last_error into error. This makes it significantly less verbose to iterate over array streams using NANOARROW_RETURN_NOT_OK()-style error handling.

static inline const char *ArrowArrayStreamGetLastError(struct ArrowArrayStream *array_stream)#

Call the get_next callback of an ArrowArrayStream.

Unlike the get_next callback, this function never returns NULL (i.e., its result is safe to use in printf-style error formatters). Null values from the original callback are reported as “<get_last_error() returned NULL>”.

struct ArrowSchema#

Public Members

const char *format#
const char *name#
const char *metadata#
int64_t flags#
int64_t n_children#
struct ArrowSchema **children#
struct ArrowSchema *dictionary#
void (*release)(struct ArrowSchema*)#
void *private_data#
struct ArrowArray#

Public Members

int64_t length#
int64_t null_count#
int64_t offset#
int64_t n_buffers#
int64_t n_children#
const void **buffers#
struct ArrowArray **children#
struct ArrowArray *dictionary#
void (*release)(struct ArrowArray*)#
void *private_data#
struct ArrowArrayStream#

Public Members

int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema *out)#
int (*get_next)(struct ArrowArrayStream*, struct ArrowArray *out)#
const char *(*get_last_error)(struct ArrowArrayStream*)#
void (*release)(struct ArrowArrayStream*)#
void *private_data#