Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
python_to_arrow.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef ARROW_PYTHON_PYTHON_TO_ARROW_H
19 #define ARROW_PYTHON_PYTHON_TO_ARROW_H
20 
21 #include <memory>
22 #include <vector>
23 
24 #include "arrow/status.h"
25 #include "arrow/util/visibility.h"
26 
27 // Forward declaring PyObject, see
28 // https://mail.python.org/pipermail/python-dev/2003-August/037601.html
29 #ifndef PyObject_HEAD
30 struct _object;
31 typedef _object PyObject;
32 #endif
33 
34 namespace arrow {
35 
36 class Buffer;
37 class DataType;
38 class MemoryPool;
39 class RecordBatch;
40 class Tensor;
41 
42 namespace io {
43 
44 class OutputStream;
45 
46 } // namespace io
47 
48 namespace py {
49 
50 struct ARROW_EXPORT SerializedPyObject {
51  std::shared_ptr<RecordBatch> batch;
52  std::vector<std::shared_ptr<Tensor>> tensors;
53  std::vector<std::shared_ptr<Buffer>> buffers;
54 
58  Status WriteTo(io::OutputStream* dst);
59 
73  Status GetComponents(MemoryPool* pool, PyObject** out);
74 };
75 
88 ARROW_EXPORT
89 Status SerializeObject(PyObject* context, PyObject* sequence, SerializedPyObject* out);
90 
95 ARROW_EXPORT
96 Status SerializeTensor(std::shared_ptr<Tensor> tensor, py::SerializedPyObject* out);
97 
104 ARROW_EXPORT
105 Status WriteTensorHeader(std::shared_ptr<DataType> dtype,
106  const std::vector<int64_t>& shape, int64_t tensor_num_bytes,
107  io::OutputStream* dst);
108 
109 } // namespace py
110 
111 } // namespace arrow
112 
113 #endif // ARROW_PYTHON_PYTHON_TO_ARROW_H
Definition: interfaces.h:111
std::shared_ptr< RecordBatch > batch
Definition: python_to_arrow.h:51
Definition: python_to_arrow.h:50
std::vector< std::shared_ptr< Tensor > > tensors
Definition: python_to_arrow.h:52
std::vector< std::shared_ptr< Buffer > > buffers
Definition: python_to_arrow.h:53
Definition: status.h:94
_object PyObject
Definition: python_to_arrow.h:30
Status SerializeTensor(std::shared_ptr< Tensor > tensor, py::SerializedPyObject *out)
Serialize an Arrow Tensor as a SerializedPyObject.
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
Status WriteTensorHeader(std::shared_ptr< DataType > dtype, const std::vector< int64_t > &shape, int64_t tensor_num_bytes, io::OutputStream *dst)
Write the Tensor metadata header to an OutputStream.
Base class for memory allocation.
Definition: memory_pool.h:34
Status SerializeObject(PyObject *context, PyObject *sequence, SerializedPyObject *out)
Serialize Python sequence as a SerializedPyObject.