Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
arrow_to_pandas.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // Functions for converting between pandas's NumPy-based data representation
19 // and Arrow data structures
20 
21 #ifndef ARROW_PYTHON_ADAPTERS_PANDAS_H
22 #define ARROW_PYTHON_ADAPTERS_PANDAS_H
23 
24 #include "arrow/python/platform.h"
25 
26 #include <memory>
27 #include <string>
28 #include <unordered_set>
29 
30 #include "arrow/util/visibility.h"
31 
32 namespace arrow {
33 
34 class Array;
35 class ChunkedArray;
36 class Column;
37 class DataType;
38 class MemoryPool;
39 class Status;
40 class Table;
41 
42 namespace py {
43 
44 struct PandasOptions {
51 
53  : strings_to_categorical(false),
54  zero_copy_only(false),
55  integer_object_nulls(false),
56  date_as_object(false),
57  use_threads(false) {}
58 };
59 
60 ARROW_EXPORT
61 Status ConvertArrayToPandas(PandasOptions options, const std::shared_ptr<Array>& arr,
62  PyObject* py_ref, PyObject** out);
63 
64 ARROW_EXPORT
65 Status ConvertChunkedArrayToPandas(PandasOptions options,
66  const std::shared_ptr<ChunkedArray>& col,
67  PyObject* py_ref, PyObject** out);
68 
69 ARROW_EXPORT
70 Status ConvertColumnToPandas(PandasOptions options, const std::shared_ptr<Column>& col,
71  PyObject* py_ref, PyObject** out);
72 
73 // Convert a whole table as efficiently as possible to a pandas.DataFrame.
74 //
75 // The returned Python object is a list of tuples consisting of the exact 2D
76 // BlockManager structure of the pandas.DataFrame used as of pandas 0.19.x.
77 //
78 // tuple item: (indices: ndarray[int32], block: ndarray[TYPE, ndim=2])
79 ARROW_EXPORT
80 Status ConvertTableToPandas(PandasOptions options, const std::shared_ptr<Table>& table,
81  MemoryPool* pool, PyObject** out);
82 
87 ARROW_EXPORT
88 Status ConvertTableToPandas(PandasOptions options,
89  const std::unordered_set<std::string>& categorical_columns,
90  const std::shared_ptr<Table>& table, MemoryPool* pool,
91  PyObject** out);
92 
93 } // namespace py
94 } // namespace arrow
95 
96 #endif // ARROW_PYTHON_ADAPTERS_PANDAS_H
Definition: status.h:95
bool zero_copy_only
Definition: arrow_to_pandas.h:47
Definition: arrow_to_pandas.h:44
bool date_as_object
Definition: arrow_to_pandas.h:49
PandasOptions()
Definition: arrow_to_pandas.h:52
Status ConvertColumnToPandas(PandasOptions options, const std::shared_ptr< Column > &col, PyObject *py_ref, PyObject **out)
bool strings_to_categorical
If true, we will convert all string columns to categoricals.
Definition: arrow_to_pandas.h:46
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
Status ConvertArrayToPandas(PandasOptions options, const std::shared_ptr< Array > &arr, PyObject *py_ref, PyObject **out)
Base class for memory allocation.
Definition: memory_pool.h:34
Status ConvertChunkedArrayToPandas(PandasOptions options, const std::shared_ptr< ChunkedArray > &col, PyObject *py_ref, PyObject **out)
bool use_threads
Definition: arrow_to_pandas.h:50
_object PyObject
Definition: serialize.h:30
Status ConvertTableToPandas(PandasOptions options, const std::shared_ptr< Table > &table, MemoryPool *pool, PyObject **out)
bool integer_object_nulls
Definition: arrow_to_pandas.h:48