Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
arrow_to_pandas.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // Functions for converting between pandas's NumPy-based data representation
19 // and Arrow data structures
20 
21 #ifndef ARROW_PYTHON_ADAPTERS_PANDAS_H
22 #define ARROW_PYTHON_ADAPTERS_PANDAS_H
23 
24 #include "arrow/python/platform.h"
25 
26 #include <memory>
27 #include <string>
28 #include <unordered_set>
29 
30 #include "arrow/util/visibility.h"
31 
32 namespace arrow {
33 
34 class Array;
35 class Column;
36 class DataType;
37 class MemoryPool;
38 class Status;
39 class Table;
40 
41 namespace py {
42 
43 struct PandasOptions {
48 
50  : strings_to_categorical(false),
51  zero_copy_only(false),
52  integer_object_nulls(false) {}
53 };
54 
55 ARROW_EXPORT
56 Status ConvertArrayToPandas(PandasOptions options, const std::shared_ptr<Array>& arr,
57  PyObject* py_ref, PyObject** out);
58 
59 ARROW_EXPORT
60 Status ConvertColumnToPandas(PandasOptions options, const std::shared_ptr<Column>& col,
61  PyObject* py_ref, PyObject** out);
62 
63 // Convert a whole table as efficiently as possible to a pandas.DataFrame.
64 //
65 // The returned Python object is a list of tuples consisting of the exact 2D
66 // BlockManager structure of the pandas.DataFrame used as of pandas 0.19.x.
67 //
68 // tuple item: (indices: ndarray[int32], block: ndarray[TYPE, ndim=2])
69 ARROW_EXPORT
70 Status ConvertTableToPandas(PandasOptions options, const std::shared_ptr<Table>& table,
71  int nthreads, MemoryPool* pool, PyObject** out);
72 
77 ARROW_EXPORT
78 Status ConvertTableToPandas(PandasOptions options,
79  const std::unordered_set<std::string>& categorical_columns,
80  const std::shared_ptr<Table>& table, int nthreads,
81  MemoryPool* pool, PyObject** out);
82 
83 } // namespace py
84 } // namespace arrow
85 
86 #endif // ARROW_PYTHON_ADAPTERS_PANDAS_H
Definition: status.h:93
bool zero_copy_only
Definition: arrow_to_pandas.h:46
Definition: arrow_to_pandas.h:43
PandasOptions()
Definition: arrow_to_pandas.h:49
Status ConvertColumnToPandas(PandasOptions options, const std::shared_ptr< Column > &col, PyObject *py_ref, PyObject **out)
bool strings_to_categorical
If true, we will convert all string columns to categoricals.
Definition: arrow_to_pandas.h:45
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
Status ConvertArrayToPandas(PandasOptions options, const std::shared_ptr< Array > &arr, PyObject *py_ref, PyObject **out)
Base class for memory allocation.
Definition: memory_pool.h:34
Status ConvertTableToPandas(PandasOptions options, const std::shared_ptr< Table > &table, int nthreads, MemoryPool *pool, PyObject **out)
bool integer_object_nulls
Definition: arrow_to_pandas.h:47