Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
Namespaces | Classes | Functions | Variables
arrow::py Namespace Reference

Namespaces

 benchmark
 

Classes

class  Ndarray1DIndexer
 Indexing convenience for interacting with strided 1-dim ndarray objects. More...
 
class  NumPyBuffer
 
class  OwnedRef
 
class  OwnedRefNoGIL
 
struct  PandasOptions
 
class  PyAcquireGIL
 
class  PyBuffer
 
struct  PyBytesView
 
struct  PyConversionOptions
 
class  PyForeignBuffer
 
class  PyOutputStream
 
class  PyReadableFile
 
struct  SerializedPyObject
 

Functions

Status ConvertArrayToPandas (PandasOptions options, const std::shared_ptr< Array > &arr, PyObject *py_ref, PyObject **out)
 
Status ConvertChunkedArrayToPandas (PandasOptions options, const std::shared_ptr< ChunkedArray > &col, PyObject *py_ref, PyObject **out)
 
Status ConvertColumnToPandas (PandasOptions options, const std::shared_ptr< Column > &col, PyObject *py_ref, PyObject **out)
 
Status ConvertTableToPandas (PandasOptions options, const std::shared_ptr< Table > &table, MemoryPool *pool, PyObject **out)
 
Status ConvertTableToPandas (PandasOptions options, const std::unordered_set< std::string > &categorical_columns, const std::shared_ptr< Table > &table, MemoryPool *pool, PyObject **out)
 Convert a whole table as efficiently as possible to a pandas.DataFrame. More...
 
Status ConvertPyError (StatusCode code=StatusCode::UnknownError)
 
Status CheckPyError (StatusCode code=StatusCode::UnknownError)
 
Status PassPyError ()
 
void set_default_memory_pool (MemoryPool *pool)
 
MemoryPoolget_memory_pool ()
 
void set_numpy_nan (PyObject *obj)
 
Status ReadSerializedObject (io::RandomAccessFile *src, SerializedPyObject *out)
 Read serialized Python sequence from file interface using Arrow IPC. More...
 
Status GetSerializedFromComponents (int num_tensors, int num_buffers, PyObject *data, SerializedPyObject *out)
 Reconstruct SerializedPyObject from representation produced by SerializedPyObject::GetComponents. More...
 
Status DeserializeObject (PyObject *context, const SerializedPyObject &object, PyObject *base, PyObject **out)
 Reconstruct Python object from Arrow-serialized representation. More...
 
Status DeserializeTensor (const SerializedPyObject &object, std::shared_ptr< Tensor > *out)
 Reconstruct Tensor from Arrow-serialized representation. More...
 
Status ReadTensor (std::shared_ptr< Buffer > src, std::shared_ptr< Tensor > *out)
 
std::shared_ptr< DataTypeGetPrimitiveType (Type::type type)
 
PyObjectPyHalf_FromHalf (npy_half value)
 
Status PyFloat_AsHalf (PyObject *obj, npy_half *out)
 
arrow::Status InferArrowType (PyObject *obj, std::shared_ptr< arrow::DataType > *out_type)
 
arrow::Status InferArrowTypeAndSize (PyObject *obj, int64_t *size, std::shared_ptr< arrow::DataType > *out_type)
 
bool IsPyBool (PyObject *obj)
 Checks whether the passed Python object is a boolean scalar. More...
 
bool IsPyInt (PyObject *obj)
 Checks whether the passed Python object is an integer scalar. More...
 
bool IsPyFloat (PyObject *obj)
 Checks whether the passed Python object is a float scalar. More...
 
template<typename VISITOR >
Status VisitNumpyArrayInline (PyArrayObject *arr, VISITOR *visitor)
 
int cast_npy_type_compat (int type_num)
 
bool is_contiguous (PyObject *array)
 
Status NumPyDtypeToArrow (PyObject *dtype, std::shared_ptr< DataType > *out)
 
Status NumPyDtypeToArrow (PyArray_Descr *descr, std::shared_ptr< DataType > *out)
 
Status GetTensorType (PyObject *dtype, std::shared_ptr< DataType > *out)
 
Status GetNumPyType (const DataType &type, int *type_num)
 
Status NdarrayToTensor (MemoryPool *pool, PyObject *ao, std::shared_ptr< Tensor > *out)
 
Status TensorToNdarray (const std::shared_ptr< Tensor > &tensor, PyObject *base, PyObject **out)
 
int import_numpy ()
 
int fix_numpy_type_num (int type_num)
 
Status NdarrayToArrow (MemoryPool *pool, PyObject *ao, PyObject *mo, bool from_pandas, const std::shared_ptr< DataType > &type, const compute::CastOptions &cast_options, std::shared_ptr< ChunkedArray > *out)
 Convert NumPy arrays to Arrow. More...
 
Status NdarrayToArrow (MemoryPool *pool, PyObject *ao, PyObject *mo, bool from_pandas, const std::shared_ptr< DataType > &type, std::shared_ptr< ChunkedArray > *out)
 Safely convert NumPy arrays to Arrow. More...
 
int import_pyarrow ()
 
bool is_buffer (PyObject *buffer)
 
Status unwrap_buffer (PyObject *buffer, std::shared_ptr< Buffer > *out)
 
PyObjectwrap_buffer (const std::shared_ptr< Buffer > &buffer)
 
bool is_data_type (PyObject *data_type)
 
Status unwrap_data_type (PyObject *data_type, std::shared_ptr< DataType > *out)
 
PyObjectwrap_data_type (const std::shared_ptr< DataType > &type)
 
bool is_field (PyObject *field)
 
Status unwrap_field (PyObject *field, std::shared_ptr< Field > *out)
 
PyObjectwrap_field (const std::shared_ptr< Field > &field)
 
bool is_schema (PyObject *schema)
 
Status unwrap_schema (PyObject *schema, std::shared_ptr< Schema > *out)
 
PyObjectwrap_schema (const std::shared_ptr< Schema > &schema)
 
bool is_array (PyObject *array)
 
Status unwrap_array (PyObject *array, std::shared_ptr< Array > *out)
 
PyObjectwrap_array (const std::shared_ptr< Array > &array)
 
bool is_tensor (PyObject *tensor)
 
Status unwrap_tensor (PyObject *tensor, std::shared_ptr< Tensor > *out)
 
PyObjectwrap_tensor (const std::shared_ptr< Tensor > &tensor)
 
bool is_column (PyObject *column)
 
Status unwrap_column (PyObject *column, std::shared_ptr< Column > *out)
 
PyObjectwrap_column (const std::shared_ptr< Column > &column)
 
bool is_table (PyObject *table)
 
Status unwrap_table (PyObject *table, std::shared_ptr< Table > *out)
 
PyObjectwrap_table (const std::shared_ptr< Table > &table)
 
bool is_record_batch (PyObject *batch)
 
Status unwrap_record_batch (PyObject *batch, std::shared_ptr< RecordBatch > *out)
 
PyObjectwrap_record_batch (const std::shared_ptr< RecordBatch > &batch)
 
Status ConvertPySequence (PyObject *obj, PyObject *mask, const PyConversionOptions &options, std::shared_ptr< ChunkedArray > *out)
 Convert sequence (list, generator, NumPy array with dtype object) of Python objects. More...
 
Status ConvertPySequence (PyObject *obj, const PyConversionOptions &options, std::shared_ptr< ChunkedArray > *out)
 
Status SerializeObject (PyObject *context, PyObject *sequence, SerializedPyObject *out)
 Serialize Python sequence as a SerializedPyObject. More...
 
Status SerializeTensor (std::shared_ptr< Tensor > tensor, py::SerializedPyObject *out)
 Serialize an Arrow Tensor as a SerializedPyObject. More...
 
Status WriteTensorHeader (std::shared_ptr< DataType > dtype, const std::vector< int64_t > &shape, int64_t tensor_num_bytes, io::OutputStream *dst)
 Write the Tensor metadata header to an OutputStream. More...
 

Variables

PyObjectnumpy_nan
 
class ARROW_NO_EXPORT PythonFile
 

Function Documentation

◆ cast_npy_type_compat()

int arrow::py::cast_npy_type_compat ( int  type_num)

◆ CheckPyError()

Status arrow::py::CheckPyError ( StatusCode  code = StatusCode::UnknownError)
inline

◆ ConvertArrayToPandas()

Status arrow::py::ConvertArrayToPandas ( PandasOptions  options,
const std::shared_ptr< Array > &  arr,
PyObject py_ref,
PyObject **  out 
)

◆ ConvertChunkedArrayToPandas()

Status arrow::py::ConvertChunkedArrayToPandas ( PandasOptions  options,
const std::shared_ptr< ChunkedArray > &  col,
PyObject py_ref,
PyObject **  out 
)

◆ ConvertColumnToPandas()

Status arrow::py::ConvertColumnToPandas ( PandasOptions  options,
const std::shared_ptr< Column > &  col,
PyObject py_ref,
PyObject **  out 
)

◆ ConvertPyError()

Status arrow::py::ConvertPyError ( StatusCode  code = StatusCode::UnknownError)

◆ ConvertPySequence() [1/2]

Status arrow::py::ConvertPySequence ( PyObject obj,
PyObject mask,
const PyConversionOptions options,
std::shared_ptr< ChunkedArray > *  out 
)

Convert sequence (list, generator, NumPy array with dtype object) of Python objects.

Parameters
[in]objthe sequence to convert
[in]maska NumPy array of true/false values to indicate whether values in the sequence are null (true) or not null (false). This parameter may be null
[in]optionsvarious conversion options
[out]outa ChunkedArray containing one or more chunks
Returns
Status

◆ ConvertPySequence() [2/2]

Status arrow::py::ConvertPySequence ( PyObject obj,
const PyConversionOptions options,
std::shared_ptr< ChunkedArray > *  out 
)

◆ ConvertTableToPandas() [1/2]

Status arrow::py::ConvertTableToPandas ( PandasOptions  options,
const std::shared_ptr< Table > &  table,
MemoryPool pool,
PyObject **  out 
)

◆ ConvertTableToPandas() [2/2]

Status arrow::py::ConvertTableToPandas ( PandasOptions  options,
const std::unordered_set< std::string > &  categorical_columns,
const std::shared_ptr< Table > &  table,
MemoryPool pool,
PyObject **  out 
)

Convert a whole table as efficiently as possible to a pandas.DataFrame.

Explicitly name columns that should be a categorical This option is only used on conversions that are applied to a table.

◆ DeserializeObject()

Status arrow::py::DeserializeObject ( PyObject context,
const SerializedPyObject object,
PyObject base,
PyObject **  out 
)

Reconstruct Python object from Arrow-serialized representation.

Parameters
[in]contextSerialization context which contains custom serialization and deserialization callbacks. Can be any Python object with a _serialize_callback method for serialization and a _deserialize_callback method for deserialization. If context is None, no custom serialization will be attempted.
[in]objectObject to deserialize
[in]basea Python object holding the underlying data that any NumPy arrays will reference, to avoid premature deallocation
[out]outThe returned object
Returns
Status This acquires the GIL

◆ DeserializeTensor()

Status arrow::py::DeserializeTensor ( const SerializedPyObject object,
std::shared_ptr< Tensor > *  out 
)

Reconstruct Tensor from Arrow-serialized representation.

Parameters
[in]objectObject to deserialize
[out]outThe deserialized tensor
Returns
Status

◆ fix_numpy_type_num()

int arrow::py::fix_numpy_type_num ( int  type_num)
inline

◆ get_memory_pool()

MemoryPool* arrow::py::get_memory_pool ( )

◆ GetNumPyType()

Status arrow::py::GetNumPyType ( const DataType type,
int *  type_num 
)

◆ GetPrimitiveType()

std::shared_ptr<DataType> arrow::py::GetPrimitiveType ( Type::type  type)

◆ GetSerializedFromComponents()

Status arrow::py::GetSerializedFromComponents ( int  num_tensors,
int  num_buffers,
PyObject data,
SerializedPyObject out 
)

Reconstruct SerializedPyObject from representation produced by SerializedPyObject::GetComponents.

Parameters
[in]num_tensorsnumber of tensors in the object
[in]num_buffersnumber of buffers in the object
[in]dataa list containing pyarrow.Buffer instances. Must be 1 + num_tensors * 2 + num_buffers in length
[out]outthe reconstructed object
Returns
Status

◆ GetTensorType()

Status arrow::py::GetTensorType ( PyObject dtype,
std::shared_ptr< DataType > *  out 
)

◆ import_numpy()

int arrow::py::import_numpy ( )
inline

◆ import_pyarrow()

int arrow::py::import_pyarrow ( )

◆ InferArrowType()

arrow::Status arrow::py::InferArrowType ( PyObject obj,
std::shared_ptr< arrow::DataType > *  out_type 
)

◆ InferArrowTypeAndSize()

arrow::Status arrow::py::InferArrowTypeAndSize ( PyObject obj,
int64_t *  size,
std::shared_ptr< arrow::DataType > *  out_type 
)

◆ is_array()

bool arrow::py::is_array ( PyObject array)

◆ is_buffer()

bool arrow::py::is_buffer ( PyObject buffer)

◆ is_column()

bool arrow::py::is_column ( PyObject column)

◆ is_contiguous()

bool arrow::py::is_contiguous ( PyObject array)

◆ is_data_type()

bool arrow::py::is_data_type ( PyObject data_type)

◆ is_field()

bool arrow::py::is_field ( PyObject field)

◆ is_record_batch()

bool arrow::py::is_record_batch ( PyObject batch)

◆ is_schema()

bool arrow::py::is_schema ( PyObject schema)

◆ is_table()

bool arrow::py::is_table ( PyObject table)

◆ is_tensor()

bool arrow::py::is_tensor ( PyObject tensor)

◆ IsPyBool()

bool arrow::py::IsPyBool ( PyObject obj)

Checks whether the passed Python object is a boolean scalar.

◆ IsPyFloat()

bool arrow::py::IsPyFloat ( PyObject obj)

Checks whether the passed Python object is a float scalar.

◆ IsPyInt()

bool arrow::py::IsPyInt ( PyObject obj)

Checks whether the passed Python object is an integer scalar.

◆ NdarrayToArrow() [1/2]

Status arrow::py::NdarrayToArrow ( MemoryPool pool,
PyObject ao,
PyObject mo,
bool  from_pandas,
const std::shared_ptr< DataType > &  type,
const compute::CastOptions cast_options,
std::shared_ptr< ChunkedArray > *  out 
)

Convert NumPy arrays to Arrow.

If target data type is not known, pass a type with null

Parameters
[in]poolMemory pool for any memory allocations
[in]aoan ndarray with the array data
[in]moan ndarray with a null mask (True is null), optional
[in]from_pandasIf true, use pandas's null sentinels to determine whether values are null
[in]typea specific type to cast to, may be null
[in]cast_optionscasting options
[out]outa ChunkedArray, to accommodate chunked output

◆ NdarrayToArrow() [2/2]

Status arrow::py::NdarrayToArrow ( MemoryPool pool,
PyObject ao,
PyObject mo,
bool  from_pandas,
const std::shared_ptr< DataType > &  type,
std::shared_ptr< ChunkedArray > *  out 
)

Safely convert NumPy arrays to Arrow.

If target data type is not known, pass a type with null.

Parameters
[in]poolMemory pool for any memory allocations
[in]aoan ndarray with the array data
[in]moan ndarray with a null mask (True is null), optional
[in]from_pandasIf true, use pandas's null sentinels to determine whether values are null
[in]typea specific type to cast to, may be null
[out]outa ChunkedArray, to accommodate chunked output

◆ NdarrayToTensor()

Status arrow::py::NdarrayToTensor ( MemoryPool pool,
PyObject ao,
std::shared_ptr< Tensor > *  out 
)

◆ NumPyDtypeToArrow() [1/2]

Status arrow::py::NumPyDtypeToArrow ( PyObject dtype,
std::shared_ptr< DataType > *  out 
)

◆ NumPyDtypeToArrow() [2/2]

Status arrow::py::NumPyDtypeToArrow ( PyArray_Descr *  descr,
std::shared_ptr< DataType > *  out 
)

◆ PassPyError()

Status arrow::py::PassPyError ( )

◆ PyFloat_AsHalf()

Status arrow::py::PyFloat_AsHalf ( PyObject obj,
npy_half *  out 
)

◆ PyHalf_FromHalf()

PyObject* arrow::py::PyHalf_FromHalf ( npy_half  value)

◆ ReadSerializedObject()

Status arrow::py::ReadSerializedObject ( io::RandomAccessFile src,
SerializedPyObject out 
)

Read serialized Python sequence from file interface using Arrow IPC.

Parameters
[in]srca RandomAccessFile
[out]outthe reconstructed data
Returns
Status

◆ ReadTensor()

Status arrow::py::ReadTensor ( std::shared_ptr< Buffer src,
std::shared_ptr< Tensor > *  out 
)

◆ SerializeObject()

Status arrow::py::SerializeObject ( PyObject context,
PyObject sequence,
SerializedPyObject out 
)

Serialize Python sequence as a SerializedPyObject.

Parameters
[in]contextSerialization context which contains custom serialization and deserialization callbacks. Can be any Python object with a _serialize_callback method for serialization and a _deserialize_callback method for deserialization. If context is None, no custom serialization will be attempted.
[in]sequenceA Python sequence object to serialize to Arrow data structures
[out]outThe serialized representation
Returns
Status

Release GIL before calling

◆ SerializeTensor()

Status arrow::py::SerializeTensor ( std::shared_ptr< Tensor tensor,
py::SerializedPyObject out 
)

Serialize an Arrow Tensor as a SerializedPyObject.

Parameters
[in]tensorTensor to be serialized
[out]outThe serialized representation
Returns
Status

◆ set_default_memory_pool()

void arrow::py::set_default_memory_pool ( MemoryPool pool)

◆ set_numpy_nan()

void arrow::py::set_numpy_nan ( PyObject obj)

◆ TensorToNdarray()

Status arrow::py::TensorToNdarray ( const std::shared_ptr< Tensor > &  tensor,
PyObject base,
PyObject **  out 
)

◆ unwrap_array()

Status arrow::py::unwrap_array ( PyObject array,
std::shared_ptr< Array > *  out 
)

◆ unwrap_buffer()

Status arrow::py::unwrap_buffer ( PyObject buffer,
std::shared_ptr< Buffer > *  out 
)

◆ unwrap_column()

Status arrow::py::unwrap_column ( PyObject column,
std::shared_ptr< Column > *  out 
)

◆ unwrap_data_type()

Status arrow::py::unwrap_data_type ( PyObject data_type,
std::shared_ptr< DataType > *  out 
)

◆ unwrap_field()

Status arrow::py::unwrap_field ( PyObject field,
std::shared_ptr< Field > *  out 
)

◆ unwrap_record_batch()

Status arrow::py::unwrap_record_batch ( PyObject batch,
std::shared_ptr< RecordBatch > *  out 
)

◆ unwrap_schema()

Status arrow::py::unwrap_schema ( PyObject schema,
std::shared_ptr< Schema > *  out 
)

◆ unwrap_table()

Status arrow::py::unwrap_table ( PyObject table,
std::shared_ptr< Table > *  out 
)

◆ unwrap_tensor()

Status arrow::py::unwrap_tensor ( PyObject tensor,
std::shared_ptr< Tensor > *  out 
)

◆ VisitNumpyArrayInline()

template<typename VISITOR >
Status arrow::py::VisitNumpyArrayInline ( PyArrayObject *  arr,
VISITOR *  visitor 
)
inline

◆ wrap_array()

PyObject* arrow::py::wrap_array ( const std::shared_ptr< Array > &  array)

◆ wrap_buffer()

PyObject* arrow::py::wrap_buffer ( const std::shared_ptr< Buffer > &  buffer)

◆ wrap_column()

PyObject* arrow::py::wrap_column ( const std::shared_ptr< Column > &  column)

◆ wrap_data_type()

PyObject* arrow::py::wrap_data_type ( const std::shared_ptr< DataType > &  type)

◆ wrap_field()

PyObject* arrow::py::wrap_field ( const std::shared_ptr< Field > &  field)

◆ wrap_record_batch()

PyObject* arrow::py::wrap_record_batch ( const std::shared_ptr< RecordBatch > &  batch)

◆ wrap_schema()

PyObject* arrow::py::wrap_schema ( const std::shared_ptr< Schema > &  schema)

◆ wrap_table()

PyObject* arrow::py::wrap_table ( const std::shared_ptr< Table > &  table)

◆ wrap_tensor()

PyObject* arrow::py::wrap_tensor ( const std::shared_ptr< Tensor > &  tensor)

◆ WriteTensorHeader()

Status arrow::py::WriteTensorHeader ( std::shared_ptr< DataType dtype,
const std::vector< int64_t > &  shape,
int64_t  tensor_num_bytes,
io::OutputStream dst 
)

Write the Tensor metadata header to an OutputStream.

Parameters
[in]dtypeDataType of the Tensor
[in]shapeThe shape of the tensor
[in]tensor_num_bytesThe lengh of the Tensor data in bytes
[in]dstThe OutputStream to write the Tensor header to
Returns
Status

Variable Documentation

◆ numpy_nan

PyObject* arrow::py::numpy_nan

◆ PythonFile

class ARROW_NO_EXPORT arrow::py::PythonFile