Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
Namespaces | Classes | Functions | Variables
arrow::py Namespace Reference

Namespaces

 benchmark
 

Classes

class  Ndarray1DIndexer
 Indexing convenience for interacting with strided 1-dim ndarray objects. More...
 
class  NumPyBuffer
 
class  OwnedRef
 
class  OwnedRefNoGIL
 
struct  PandasOptions
 
class  PyAcquireGIL
 
class  PyBuffer
 
struct  PyBytesView
 
class  PyForeignBuffer
 
class  PyOutputStream
 
class  PyReadableFile
 
struct  SerializedPyObject
 

Functions

Status ConvertArrayToPandas (PandasOptions options, const std::shared_ptr< Array > &arr, PyObject *py_ref, PyObject **out)
 
Status ConvertColumnToPandas (PandasOptions options, const std::shared_ptr< Column > &col, PyObject *py_ref, PyObject **out)
 
Status ConvertTableToPandas (PandasOptions options, const std::shared_ptr< Table > &table, int nthreads, MemoryPool *pool, PyObject **out)
 
Status ConvertTableToPandas (PandasOptions options, const std::unordered_set< std::string > &categorical_columns, const std::shared_ptr< Table > &table, int nthreads, MemoryPool *pool, PyObject **out)
 Convert a whole table as efficiently as possible to a pandas.DataFrame. More...
 
Status ReadSerializedObject (io::RandomAccessFile *src, SerializedPyObject *out)
 Read serialized Python sequence from file interface using Arrow IPC. More...
 
Status GetSerializedFromComponents (int num_tensors, int num_buffers, PyObject *data, SerializedPyObject *out)
 Reconstruct SerializedPyObject from representation produced by SerializedPyObject::GetComponents. More...
 
Status DeserializeObject (PyObject *context, const SerializedPyObject &object, PyObject *base, PyObject **out)
 Reconstruct Python object from Arrow-serialized representation. More...
 
arrow::Status InferArrowType (PyObject *obj, std::shared_ptr< arrow::DataType > *out_type)
 
arrow::Status InferArrowTypeAndSize (PyObject *obj, int64_t *size, std::shared_ptr< arrow::DataType > *out_type)
 
arrow::Status AppendPySequence (PyObject *obj, int64_t size, const std::shared_ptr< arrow::DataType > &type, arrow::ArrayBuilder *builder)
 
Status ConvertPySequence (PyObject *obj, MemoryPool *pool, std::shared_ptr< Array > *out)
 
Status ConvertPySequence (PyObject *obj, int64_t size, MemoryPool *pool, std::shared_ptr< Array > *out)
 
Status ConvertPySequence (PyObject *obj, const std::shared_ptr< DataType > &type, MemoryPool *pool, std::shared_ptr< Array > *out)
 
Status ConvertPySequence (PyObject *obj, int64_t size, const std::shared_ptr< DataType > &type, MemoryPool *pool, std::shared_ptr< Array > *out)
 
Status InvalidConversion (PyObject *obj, const std::string &expected_type_name, std::ostream *out)
 
Status ConvertPyError (StatusCode code=StatusCode::UnknownError)
 
Status CheckPyError (StatusCode code=StatusCode::UnknownError)
 
Status PassPyError ()
 
void set_default_memory_pool (MemoryPool *pool)
 
MemoryPoolget_memory_pool ()
 
void set_numpy_nan (PyObject *obj)
 
std::shared_ptr< DataTypeGetPrimitiveType (Type::type type)
 
PyObject * PyHalf_FromHalf (npy_half value)
 
Status PyFloat_AsHalf (PyObject *obj, npy_half *out)
 
template<typename VISITOR >
Status VisitNumpyArrayInline (PyArrayObject *arr, VISITOR *visitor)
 
int cast_npy_type_compat (int type_num)
 
bool is_contiguous (PyObject *array)
 
Status NumPyDtypeToArrow (PyObject *dtype, std::shared_ptr< DataType > *out)
 
Status NumPyDtypeToArrow (PyArray_Descr *descr, std::shared_ptr< DataType > *out)
 
Status GetTensorType (PyObject *dtype, std::shared_ptr< DataType > *out)
 
Status GetNumPyType (const DataType &type, int *type_num)
 
Status NdarrayToTensor (MemoryPool *pool, PyObject *ao, std::shared_ptr< Tensor > *out)
 
Status TensorToNdarray (const std::shared_ptr< Tensor > &tensor, PyObject *base, PyObject **out)
 
int import_numpy ()
 
Status NdarrayToArrow (MemoryPool *pool, PyObject *ao, PyObject *mo, bool use_pandas_null_sentinels, const std::shared_ptr< DataType > &type, std::shared_ptr< ChunkedArray > *out)
 Convert NumPy arrays to Arrow. More...
 
int import_pyarrow ()
 
bool is_buffer (PyObject *buffer)
 
Status unwrap_buffer (PyObject *buffer, std::shared_ptr< Buffer > *out)
 
PyObject * wrap_buffer (const std::shared_ptr< Buffer > &buffer)
 
bool is_data_type (PyObject *data_type)
 
Status unwrap_data_type (PyObject *data_type, std::shared_ptr< DataType > *out)
 
PyObject * wrap_data_type (const std::shared_ptr< DataType > &type)
 
bool is_field (PyObject *field)
 
Status unwrap_field (PyObject *field, std::shared_ptr< Field > *out)
 
PyObject * wrap_field (const std::shared_ptr< Field > &field)
 
bool is_schema (PyObject *schema)
 
Status unwrap_schema (PyObject *schema, std::shared_ptr< Schema > *out)
 
PyObject * wrap_schema (const std::shared_ptr< Schema > &schema)
 
bool is_array (PyObject *array)
 
Status unwrap_array (PyObject *array, std::shared_ptr< Array > *out)
 
PyObject * wrap_array (const std::shared_ptr< Array > &array)
 
bool is_tensor (PyObject *tensor)
 
Status unwrap_tensor (PyObject *tensor, std::shared_ptr< Tensor > *out)
 
PyObject * wrap_tensor (const std::shared_ptr< Tensor > &tensor)
 
bool is_column (PyObject *column)
 
Status unwrap_column (PyObject *column, std::shared_ptr< Column > *out)
 
PyObject * wrap_column (const std::shared_ptr< Column > &column)
 
bool is_table (PyObject *table)
 
Status unwrap_table (PyObject *table, std::shared_ptr< Table > *out)
 
PyObject * wrap_table (const std::shared_ptr< Table > &table)
 
bool is_record_batch (PyObject *batch)
 
Status unwrap_record_batch (PyObject *batch, std::shared_ptr< RecordBatch > *out)
 
PyObject * wrap_record_batch (const std::shared_ptr< RecordBatch > &batch)
 
Status SerializeObject (PyObject *context, PyObject *sequence, SerializedPyObject *out)
 Serialize Python sequence as a RecordBatch plus. More...
 

Variables

PyObject * numpy_nan
 
class ARROW_NO_EXPORT PythonFile
 

Function Documentation

◆ AppendPySequence()

arrow::Status arrow::py::AppendPySequence ( PyObject *  obj,
int64_t  size,
const std::shared_ptr< arrow::DataType > &  type,
arrow::ArrayBuilder builder 
)

◆ cast_npy_type_compat()

int arrow::py::cast_npy_type_compat ( int  type_num)

◆ CheckPyError()

Status arrow::py::CheckPyError ( StatusCode  code = StatusCode::UnknownError)
inline

◆ ConvertArrayToPandas()

Status arrow::py::ConvertArrayToPandas ( PandasOptions  options,
const std::shared_ptr< Array > &  arr,
PyObject *  py_ref,
PyObject **  out 
)

◆ ConvertColumnToPandas()

Status arrow::py::ConvertColumnToPandas ( PandasOptions  options,
const std::shared_ptr< Column > &  col,
PyObject *  py_ref,
PyObject **  out 
)

◆ ConvertPyError()

Status arrow::py::ConvertPyError ( StatusCode  code = StatusCode::UnknownError)

◆ ConvertPySequence() [1/4]

Status arrow::py::ConvertPySequence ( PyObject *  obj,
MemoryPool pool,
std::shared_ptr< Array > *  out 
)

◆ ConvertPySequence() [2/4]

Status arrow::py::ConvertPySequence ( PyObject *  obj,
int64_t  size,
MemoryPool pool,
std::shared_ptr< Array > *  out 
)

◆ ConvertPySequence() [3/4]

Status arrow::py::ConvertPySequence ( PyObject *  obj,
const std::shared_ptr< DataType > &  type,
MemoryPool pool,
std::shared_ptr< Array > *  out 
)

◆ ConvertPySequence() [4/4]

Status arrow::py::ConvertPySequence ( PyObject *  obj,
int64_t  size,
const std::shared_ptr< DataType > &  type,
MemoryPool pool,
std::shared_ptr< Array > *  out 
)

◆ ConvertTableToPandas() [1/2]

Status arrow::py::ConvertTableToPandas ( PandasOptions  options,
const std::shared_ptr< Table > &  table,
int  nthreads,
MemoryPool pool,
PyObject **  out 
)

◆ ConvertTableToPandas() [2/2]

Status arrow::py::ConvertTableToPandas ( PandasOptions  options,
const std::unordered_set< std::string > &  categorical_columns,
const std::shared_ptr< Table > &  table,
int  nthreads,
MemoryPool pool,
PyObject **  out 
)

Convert a whole table as efficiently as possible to a pandas.DataFrame.

Explicitly name columns that should be a categorical This option is only used on conversions that are applied to a table.

◆ DeserializeObject()

Status arrow::py::DeserializeObject ( PyObject *  context,
const SerializedPyObject object,
PyObject *  base,
PyObject **  out 
)

Reconstruct Python object from Arrow-serialized representation.

Parameters
[in]contextSerialization context which contains custom serialization and deserialization callbacks. Can be any Python object with a _serialize_callback method for serialization and a _deserialize_callback method for deserialization. If context is None, no custom serialization will be attempted.
[in]objectobject to deserialize
[in]basea Python object holding the underlying data that any NumPy arrays will reference, to avoid premature deallocation
[out]outthe returned object
Returns
Status This acquires the GIL

◆ get_memory_pool()

MemoryPool* arrow::py::get_memory_pool ( )

◆ GetNumPyType()

Status arrow::py::GetNumPyType ( const DataType type,
int *  type_num 
)

◆ GetPrimitiveType()

std::shared_ptr<DataType> arrow::py::GetPrimitiveType ( Type::type  type)

◆ GetSerializedFromComponents()

Status arrow::py::GetSerializedFromComponents ( int  num_tensors,
int  num_buffers,
PyObject *  data,
SerializedPyObject out 
)

Reconstruct SerializedPyObject from representation produced by SerializedPyObject::GetComponents.

Parameters
[in]num_tensorsnumber of tensors in the object
[in]num_buffersnumber of buffers in the object
[in]dataa list containing pyarrow.Buffer instances. Must be 1 + num_tensors * 2 + num_buffers in length
[out]outthe reconstructed object
Returns
Status

◆ GetTensorType()

Status arrow::py::GetTensorType ( PyObject *  dtype,
std::shared_ptr< DataType > *  out 
)

◆ import_numpy()

int arrow::py::import_numpy ( )
inline

◆ import_pyarrow()

int arrow::py::import_pyarrow ( )

◆ InferArrowType()

arrow::Status arrow::py::InferArrowType ( PyObject *  obj,
std::shared_ptr< arrow::DataType > *  out_type 
)

◆ InferArrowTypeAndSize()

arrow::Status arrow::py::InferArrowTypeAndSize ( PyObject *  obj,
int64_t *  size,
std::shared_ptr< arrow::DataType > *  out_type 
)

◆ InvalidConversion()

Status arrow::py::InvalidConversion ( PyObject *  obj,
const std::string &  expected_type_name,
std::ostream *  out 
)

◆ is_array()

bool arrow::py::is_array ( PyObject *  array)

◆ is_buffer()

bool arrow::py::is_buffer ( PyObject *  buffer)

◆ is_column()

bool arrow::py::is_column ( PyObject *  column)

◆ is_contiguous()

bool arrow::py::is_contiguous ( PyObject *  array)

◆ is_data_type()

bool arrow::py::is_data_type ( PyObject *  data_type)

◆ is_field()

bool arrow::py::is_field ( PyObject *  field)

◆ is_record_batch()

bool arrow::py::is_record_batch ( PyObject *  batch)

◆ is_schema()

bool arrow::py::is_schema ( PyObject *  schema)

◆ is_table()

bool arrow::py::is_table ( PyObject *  table)

◆ is_tensor()

bool arrow::py::is_tensor ( PyObject *  tensor)

◆ NdarrayToArrow()

Status arrow::py::NdarrayToArrow ( MemoryPool pool,
PyObject *  ao,
PyObject *  mo,
bool  use_pandas_null_sentinels,
const std::shared_ptr< DataType > &  type,
std::shared_ptr< ChunkedArray > *  out 
)

Convert NumPy arrays to Arrow.

If target data type is not known, pass a type with null

Parameters
[in]poolMemory pool for any memory allocations
[in]aoan ndarray with the array data
[in]moan ndarray with a null mask (True is null), optional
[in]typea specific type to cast to, may be null
[out]outa ChunkedArray, to accommodate chunked output

◆ NdarrayToTensor()

Status arrow::py::NdarrayToTensor ( MemoryPool pool,
PyObject *  ao,
std::shared_ptr< Tensor > *  out 
)

◆ NumPyDtypeToArrow() [1/2]

Status arrow::py::NumPyDtypeToArrow ( PyObject *  dtype,
std::shared_ptr< DataType > *  out 
)

◆ NumPyDtypeToArrow() [2/2]

Status arrow::py::NumPyDtypeToArrow ( PyArray_Descr *  descr,
std::shared_ptr< DataType > *  out 
)

◆ PassPyError()

Status arrow::py::PassPyError ( )

◆ PyFloat_AsHalf()

Status arrow::py::PyFloat_AsHalf ( PyObject *  obj,
npy_half *  out 
)

◆ PyHalf_FromHalf()

PyObject* arrow::py::PyHalf_FromHalf ( npy_half  value)

◆ ReadSerializedObject()

Status arrow::py::ReadSerializedObject ( io::RandomAccessFile src,
SerializedPyObject out 
)

Read serialized Python sequence from file interface using Arrow IPC.

Parameters
[in]srca RandomAccessFile
[out]outthe reconstructed data
Returns
Status

◆ SerializeObject()

Status arrow::py::SerializeObject ( PyObject *  context,
PyObject *  sequence,
SerializedPyObject out 
)

Serialize Python sequence as a RecordBatch plus.

Parameters
[in]contextSerialization context which contains custom serialization and deserialization callbacks. Can be any Python object with a _serialize_callback method for serialization and a _deserialize_callback method for deserialization. If context is None, no custom serialization will be attempted.
[in]sequencea Python sequence object to serialize to Arrow data structures
[out]outthe serialized representation
Returns
Status

Release GIL before calling

◆ set_default_memory_pool()

void arrow::py::set_default_memory_pool ( MemoryPool pool)

◆ set_numpy_nan()

void arrow::py::set_numpy_nan ( PyObject *  obj)

◆ TensorToNdarray()

Status arrow::py::TensorToNdarray ( const std::shared_ptr< Tensor > &  tensor,
PyObject *  base,
PyObject **  out 
)

◆ unwrap_array()

Status arrow::py::unwrap_array ( PyObject *  array,
std::shared_ptr< Array > *  out 
)

◆ unwrap_buffer()

Status arrow::py::unwrap_buffer ( PyObject *  buffer,
std::shared_ptr< Buffer > *  out 
)

◆ unwrap_column()

Status arrow::py::unwrap_column ( PyObject *  column,
std::shared_ptr< Column > *  out 
)

◆ unwrap_data_type()

Status arrow::py::unwrap_data_type ( PyObject *  data_type,
std::shared_ptr< DataType > *  out 
)

◆ unwrap_field()

Status arrow::py::unwrap_field ( PyObject *  field,
std::shared_ptr< Field > *  out 
)

◆ unwrap_record_batch()

Status arrow::py::unwrap_record_batch ( PyObject *  batch,
std::shared_ptr< RecordBatch > *  out 
)

◆ unwrap_schema()

Status arrow::py::unwrap_schema ( PyObject *  schema,
std::shared_ptr< Schema > *  out 
)

◆ unwrap_table()

Status arrow::py::unwrap_table ( PyObject *  table,
std::shared_ptr< Table > *  out 
)

◆ unwrap_tensor()

Status arrow::py::unwrap_tensor ( PyObject *  tensor,
std::shared_ptr< Tensor > *  out 
)

◆ VisitNumpyArrayInline()

template<typename VISITOR >
Status arrow::py::VisitNumpyArrayInline ( PyArrayObject *  arr,
VISITOR *  visitor 
)
inline

◆ wrap_array()

PyObject* arrow::py::wrap_array ( const std::shared_ptr< Array > &  array)

◆ wrap_buffer()

PyObject* arrow::py::wrap_buffer ( const std::shared_ptr< Buffer > &  buffer)

◆ wrap_column()

PyObject* arrow::py::wrap_column ( const std::shared_ptr< Column > &  column)

◆ wrap_data_type()

PyObject* arrow::py::wrap_data_type ( const std::shared_ptr< DataType > &  type)

◆ wrap_field()

PyObject* arrow::py::wrap_field ( const std::shared_ptr< Field > &  field)

◆ wrap_record_batch()

PyObject* arrow::py::wrap_record_batch ( const std::shared_ptr< RecordBatch > &  batch)

◆ wrap_schema()

PyObject* arrow::py::wrap_schema ( const std::shared_ptr< Schema > &  schema)

◆ wrap_table()

PyObject* arrow::py::wrap_table ( const std::shared_ptr< Table > &  table)

◆ wrap_tensor()

PyObject* arrow::py::wrap_tensor ( const std::shared_ptr< Tensor > &  tensor)

Variable Documentation

◆ numpy_nan

PyObject* arrow::py::numpy_nan

◆ PythonFile

class ARROW_NO_EXPORT arrow::py::PythonFile