Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
Classes | Functions | Variables
arrow::py Namespace Reference

Classes

class  Ndarray1DIndexer
 Indexing convenience for interacting with strided 1-dim ndarray objects. More...
 
class  NumPyBuffer
 
class  OwnedRef
 
struct  PandasOptions
 
class  PyAcquireGIL
 
class  PyBuffer
 
class  PyBytesReader
 
struct  PyObjectStringify
 
class  PyOutputStream
 
class  PyReadableFile
 
class  ScopedRef
 
struct  SerializedPyObject
 

Functions

Status ConvertArrayToPandas (PandasOptions options, const std::shared_ptr< Array > &arr, PyObject *py_ref, PyObject **out)
 
Status ConvertColumnToPandas (PandasOptions options, const std::shared_ptr< Column > &col, PyObject *py_ref, PyObject **out)
 
Status ConvertTableToPandas (PandasOptions options, const std::shared_ptr< Table > &table, int nthreads, MemoryPool *pool, PyObject **out)
 
Status ReadSerializedObject (io::RandomAccessFile *src, SerializedPyObject *out)
 Read serialized Python sequence from file interface using Arrow IPC. More...
 
Status DeserializeObject (PyObject *context, const SerializedPyObject &object, PyObject *base, PyObject **out)
 Reconstruct Python object from Arrow-serialized representation. More...
 
arrow::Status InferArrowType (PyObject *obj, std::shared_ptr< arrow::DataType > *out_type)
 
arrow::Status InferArrowTypeAndSize (PyObject *obj, int64_t *size, std::shared_ptr< arrow::DataType > *out_type)
 
arrow::Status InferArrowSize (PyObject *obj, int64_t *size)
 
arrow::Status AppendPySequence (PyObject *obj, int64_t size, const std::shared_ptr< arrow::DataType > &type, arrow::ArrayBuilder *builder)
 
Status ConvertPySequence (PyObject *obj, MemoryPool *pool, std::shared_ptr< Array > *out)
 
Status ConvertPySequence (PyObject *obj, MemoryPool *pool, std::shared_ptr< Array > *out, const std::shared_ptr< DataType > &type)
 
Status ConvertPySequence (PyObject *obj, MemoryPool *pool, std::shared_ptr< Array > *out, const std::shared_ptr< DataType > &type, int64_t size)
 
Status InvalidConversion (PyObject *obj, const std::string &expected_type_name, std::ostream *out)
 
Status CheckPythonBytesAreFixedLength (PyObject *obj, Py_ssize_t expected_length)
 
Status CheckPyError (StatusCode code=StatusCode::UnknownError)
 
Status PassPyError ()
 
void set_default_memory_pool (MemoryPool *pool)
 
MemoryPoolget_memory_pool ()
 
void set_numpy_nan (PyObject *obj)
 
std::shared_ptr< DataTypeGetPrimitiveType (Type::type type)
 
int cast_npy_type_compat (int type_num)
 
bool is_contiguous (PyObject *array)
 
Status NumPyDtypeToArrow (PyObject *dtype, std::shared_ptr< DataType > *out)
 
Status GetTensorType (PyObject *dtype, std::shared_ptr< DataType > *out)
 
Status GetNumPyType (const DataType &type, int *type_num)
 
Status NdarrayToTensor (MemoryPool *pool, PyObject *ao, std::shared_ptr< Tensor > *out)
 
Status TensorToNdarray (const Tensor &tensor, PyObject *base, PyObject **out)
 
int import_numpy ()
 
Status NdarrayToArrow (MemoryPool *pool, PyObject *ao, PyObject *mo, bool use_pandas_null_sentinels, const std::shared_ptr< DataType > &type, std::shared_ptr< ChunkedArray > *out)
 Convert NumPy arrays to Arrow. More...
 
int import_pyarrow ()
 
bool is_buffer (PyObject *buffer)
 
Status unwrap_buffer (PyObject *buffer, std::shared_ptr< Buffer > *out)
 
PyObject * wrap_buffer (const std::shared_ptr< Buffer > &buffer)
 
bool is_data_type (PyObject *data_type)
 
Status unwrap_data_type (PyObject *data_type, std::shared_ptr< DataType > *out)
 
PyObject * wrap_data_type (const std::shared_ptr< DataType > &type)
 
bool is_field (PyObject *field)
 
Status unwrap_field (PyObject *field, std::shared_ptr< Field > *out)
 
PyObject * wrap_field (const std::shared_ptr< Field > &field)
 
bool is_schema (PyObject *schema)
 
Status unwrap_schema (PyObject *schema, std::shared_ptr< Schema > *out)
 
PyObject * wrap_schema (const std::shared_ptr< Schema > &schema)
 
bool is_array (PyObject *array)
 
Status unwrap_array (PyObject *array, std::shared_ptr< Array > *out)
 
PyObject * wrap_array (const std::shared_ptr< Array > &array)
 
bool is_tensor (PyObject *tensor)
 
Status unwrap_tensor (PyObject *tensor, std::shared_ptr< Tensor > *out)
 
PyObject * wrap_tensor (const std::shared_ptr< Tensor > &tensor)
 
bool is_column (PyObject *column)
 
Status unwrap_column (PyObject *column, std::shared_ptr< Column > *out)
 
PyObject * wrap_column (const std::shared_ptr< Column > &column)
 
bool is_table (PyObject *table)
 
Status unwrap_table (PyObject *table, std::shared_ptr< Table > *out)
 
PyObject * wrap_table (const std::shared_ptr< Table > &table)
 
bool is_record_batch (PyObject *batch)
 
Status unwrap_record_batch (PyObject *batch, std::shared_ptr< RecordBatch > *out)
 
PyObject * wrap_record_batch (const std::shared_ptr< RecordBatch > &batch)
 
Status SerializeObject (PyObject *context, PyObject *sequence, SerializedPyObject *out)
 Serialize Python sequence as a RecordBatch plus. More...
 
Status WriteSerializedObject (const SerializedPyObject &object, io::OutputStream *dst)
 Write serialized Python object to OutputStream. More...
 

Variables

PyObject * numpy_nan
 
class ARROW_NO_EXPORT PythonFile
 

Function Documentation

◆ AppendPySequence()

arrow::Status arrow::py::AppendPySequence ( PyObject *  obj,
int64_t  size,
const std::shared_ptr< arrow::DataType > &  type,
arrow::ArrayBuilder builder 
)

◆ cast_npy_type_compat()

int arrow::py::cast_npy_type_compat ( int  type_num)

◆ CheckPyError()

Status arrow::py::CheckPyError ( StatusCode  code = StatusCode::UnknownError)

◆ CheckPythonBytesAreFixedLength()

Status arrow::py::CheckPythonBytesAreFixedLength ( PyObject *  obj,
Py_ssize_t  expected_length 
)

◆ ConvertArrayToPandas()

Status arrow::py::ConvertArrayToPandas ( PandasOptions  options,
const std::shared_ptr< Array > &  arr,
PyObject *  py_ref,
PyObject **  out 
)

◆ ConvertColumnToPandas()

Status arrow::py::ConvertColumnToPandas ( PandasOptions  options,
const std::shared_ptr< Column > &  col,
PyObject *  py_ref,
PyObject **  out 
)

◆ ConvertPySequence() [1/3]

Status arrow::py::ConvertPySequence ( PyObject *  obj,
MemoryPool pool,
std::shared_ptr< Array > *  out 
)

◆ ConvertPySequence() [2/3]

Status arrow::py::ConvertPySequence ( PyObject *  obj,
MemoryPool pool,
std::shared_ptr< Array > *  out,
const std::shared_ptr< DataType > &  type 
)

◆ ConvertPySequence() [3/3]

Status arrow::py::ConvertPySequence ( PyObject *  obj,
MemoryPool pool,
std::shared_ptr< Array > *  out,
const std::shared_ptr< DataType > &  type,
int64_t  size 
)

◆ ConvertTableToPandas()

Status arrow::py::ConvertTableToPandas ( PandasOptions  options,
const std::shared_ptr< Table > &  table,
int  nthreads,
MemoryPool pool,
PyObject **  out 
)

◆ DeserializeObject()

Status arrow::py::DeserializeObject ( PyObject *  context,
const SerializedPyObject object,
PyObject *  base,
PyObject **  out 
)

Reconstruct Python object from Arrow-serialized representation.

Parameters
[in]contextSerialization context which contains custom serialization and deserialization callbacks. Can be any Python object with a _serialize_callback method for serialization and a _deserialize_callback method for deserialization. If context is None, no custom serialization will be attempted.
[in]objectobject to deserialize
[in]basea Python object holding the underlying data that any NumPy arrays will reference, to avoid premature deallocation
[out]outthe returned object
Returns
Status This acquires the GIL

◆ get_memory_pool()

MemoryPool* arrow::py::get_memory_pool ( )

◆ GetNumPyType()

Status arrow::py::GetNumPyType ( const DataType type,
int *  type_num 
)

◆ GetPrimitiveType()

std::shared_ptr<DataType> arrow::py::GetPrimitiveType ( Type::type  type)

◆ GetTensorType()

Status arrow::py::GetTensorType ( PyObject *  dtype,
std::shared_ptr< DataType > *  out 
)

◆ import_numpy()

int arrow::py::import_numpy ( )
inline

◆ import_pyarrow()

int arrow::py::import_pyarrow ( )

◆ InferArrowSize()

arrow::Status arrow::py::InferArrowSize ( PyObject *  obj,
int64_t *  size 
)

◆ InferArrowType()

arrow::Status arrow::py::InferArrowType ( PyObject *  obj,
std::shared_ptr< arrow::DataType > *  out_type 
)

◆ InferArrowTypeAndSize()

arrow::Status arrow::py::InferArrowTypeAndSize ( PyObject *  obj,
int64_t *  size,
std::shared_ptr< arrow::DataType > *  out_type 
)

◆ InvalidConversion()

Status arrow::py::InvalidConversion ( PyObject *  obj,
const std::string &  expected_type_name,
std::ostream *  out 
)

◆ is_array()

bool arrow::py::is_array ( PyObject *  array)

◆ is_buffer()

bool arrow::py::is_buffer ( PyObject *  buffer)

◆ is_column()

bool arrow::py::is_column ( PyObject *  column)

◆ is_contiguous()

bool arrow::py::is_contiguous ( PyObject *  array)

◆ is_data_type()

bool arrow::py::is_data_type ( PyObject *  data_type)

◆ is_field()

bool arrow::py::is_field ( PyObject *  field)

◆ is_record_batch()

bool arrow::py::is_record_batch ( PyObject *  batch)

◆ is_schema()

bool arrow::py::is_schema ( PyObject *  schema)

◆ is_table()

bool arrow::py::is_table ( PyObject *  table)

◆ is_tensor()

bool arrow::py::is_tensor ( PyObject *  tensor)

◆ NdarrayToArrow()

Status arrow::py::NdarrayToArrow ( MemoryPool pool,
PyObject *  ao,
PyObject *  mo,
bool  use_pandas_null_sentinels,
const std::shared_ptr< DataType > &  type,
std::shared_ptr< ChunkedArray > *  out 
)

Convert NumPy arrays to Arrow.

If target data type is not known, pass a type with null

Parameters
[in]poolMemory pool for any memory allocations
[in]aoan ndarray with the array data
[in]moan ndarray with a null mask (True is null), optional
[in]typea specific type to cast to, may be null
[out]outa ChunkedArray, to accommodate chunked output

◆ NdarrayToTensor()

Status arrow::py::NdarrayToTensor ( MemoryPool pool,
PyObject *  ao,
std::shared_ptr< Tensor > *  out 
)

◆ NumPyDtypeToArrow()

Status arrow::py::NumPyDtypeToArrow ( PyObject *  dtype,
std::shared_ptr< DataType > *  out 
)

◆ PassPyError()

Status arrow::py::PassPyError ( )

◆ ReadSerializedObject()

Status arrow::py::ReadSerializedObject ( io::RandomAccessFile src,
SerializedPyObject out 
)

Read serialized Python sequence from file interface using Arrow IPC.

Parameters
[in]srca RandomAccessFile
[out]outthe reconstructed data
Returns
Status

◆ SerializeObject()

Status arrow::py::SerializeObject ( PyObject *  context,
PyObject *  sequence,
SerializedPyObject out 
)

Serialize Python sequence as a RecordBatch plus.

Parameters
[in]contextSerialization context which contains custom serialization and deserialization callbacks. Can be any Python object with a _serialize_callback method for serialization and a _deserialize_callback method for deserialization. If context is None, no custom serialization will be attempted.
[in]sequencea Python sequence object to serialize to Arrow data structures
[out]outthe serialized representation
Returns
Status

Release GIL before calling

◆ set_default_memory_pool()

void arrow::py::set_default_memory_pool ( MemoryPool pool)

◆ set_numpy_nan()

void arrow::py::set_numpy_nan ( PyObject *  obj)

◆ TensorToNdarray()

Status arrow::py::TensorToNdarray ( const Tensor tensor,
PyObject *  base,
PyObject **  out 
)

◆ unwrap_array()

Status arrow::py::unwrap_array ( PyObject *  array,
std::shared_ptr< Array > *  out 
)

◆ unwrap_buffer()

Status arrow::py::unwrap_buffer ( PyObject *  buffer,
std::shared_ptr< Buffer > *  out 
)

◆ unwrap_column()

Status arrow::py::unwrap_column ( PyObject *  column,
std::shared_ptr< Column > *  out 
)

◆ unwrap_data_type()

Status arrow::py::unwrap_data_type ( PyObject *  data_type,
std::shared_ptr< DataType > *  out 
)

◆ unwrap_field()

Status arrow::py::unwrap_field ( PyObject *  field,
std::shared_ptr< Field > *  out 
)

◆ unwrap_record_batch()

Status arrow::py::unwrap_record_batch ( PyObject *  batch,
std::shared_ptr< RecordBatch > *  out 
)

◆ unwrap_schema()

Status arrow::py::unwrap_schema ( PyObject *  schema,
std::shared_ptr< Schema > *  out 
)

◆ unwrap_table()

Status arrow::py::unwrap_table ( PyObject *  table,
std::shared_ptr< Table > *  out 
)

◆ unwrap_tensor()

Status arrow::py::unwrap_tensor ( PyObject *  tensor,
std::shared_ptr< Tensor > *  out 
)

◆ wrap_array()

PyObject* arrow::py::wrap_array ( const std::shared_ptr< Array > &  array)

◆ wrap_buffer()

PyObject* arrow::py::wrap_buffer ( const std::shared_ptr< Buffer > &  buffer)

◆ wrap_column()

PyObject* arrow::py::wrap_column ( const std::shared_ptr< Column > &  column)

◆ wrap_data_type()

PyObject* arrow::py::wrap_data_type ( const std::shared_ptr< DataType > &  type)

◆ wrap_field()

PyObject* arrow::py::wrap_field ( const std::shared_ptr< Field > &  field)

◆ wrap_record_batch()

PyObject* arrow::py::wrap_record_batch ( const std::shared_ptr< RecordBatch > &  batch)

◆ wrap_schema()

PyObject* arrow::py::wrap_schema ( const std::shared_ptr< Schema > &  schema)

◆ wrap_table()

PyObject* arrow::py::wrap_table ( const std::shared_ptr< Table > &  table)

◆ wrap_tensor()

PyObject* arrow::py::wrap_tensor ( const std::shared_ptr< Tensor > &  tensor)

◆ WriteSerializedObject()

Status arrow::py::WriteSerializedObject ( const SerializedPyObject object,
io::OutputStream dst 
)

Write serialized Python object to OutputStream.

Parameters
[in]objecta serialized Python object to write out
[out]dstan OutputStream
Returns
Status

Variable Documentation

◆ numpy_nan

PyObject* arrow::py::numpy_nan

◆ PythonFile

class ARROW_NO_EXPORT arrow::py::PythonFile