Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
io.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef PYARROW_IO_H
19 #define PYARROW_IO_H
20 
21 #include <memory>
22 
23 #include "arrow/io/interfaces.h"
24 #include "arrow/io/memory.h"
25 #include "arrow/util/visibility.h"
26 
27 #include "arrow/python/config.h"
28 
29 #include "arrow/python/common.h"
30 
31 namespace arrow {
32 
33 class MemoryPool;
34 
35 namespace py {
36 
38 
39 class ARROW_EXPORT PyReadableFile : public io::RandomAccessFile {
40  public:
41  explicit PyReadableFile(PyObject* file);
42  ~PyReadableFile() override;
43 
44  Status Close() override;
45 
46  Status Read(int64_t nbytes, int64_t* bytes_read, void* out) override;
47  Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override;
48 
49  // Thread-safe version
50  Status ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read,
51  void* out) override;
52 
53  // Thread-safe version
54  Status ReadAt(int64_t position, int64_t nbytes, std::shared_ptr<Buffer>* out) override;
55 
56  Status GetSize(int64_t* size) override;
57 
58  Status Seek(int64_t position) override;
59 
60  Status Tell(int64_t* position) const override;
61 
62  bool supports_zero_copy() const override;
63 
64  private:
65  std::unique_ptr<PythonFile> file_;
66 };
67 
68 class ARROW_EXPORT PyOutputStream : public io::OutputStream {
69  public:
70  explicit PyOutputStream(PyObject* file);
71  ~PyOutputStream() override;
72 
73  Status Close() override;
74  Status Tell(int64_t* position) const override;
75  Status Write(const void* data, int64_t nbytes) override;
76 
77  private:
78  std::unique_ptr<PythonFile> file_;
79  int64_t position_;
80 };
81 
82 // TODO(wesm): seekable output files
83 
84 // A Buffer subclass that keeps a PyObject reference throughout its
85 // lifetime, such that the Python object is kept alive as long as the
86 // C++ buffer is still needed.
87 // Keeping the reference in a Python wrapper would be incorrect as
88 // the Python wrapper can get destroyed even though the wrapped C++
89 // buffer is still alive (ARROW-2270).
90 class ARROW_EXPORT PyForeignBuffer : public Buffer {
91  public:
92  static Status Make(const uint8_t* data, int64_t size, PyObject* base,
93  std::shared_ptr<Buffer>* out);
94 
95  private:
96  PyForeignBuffer(const uint8_t* data, int64_t size, PyObject* base)
97  : Buffer(data, size) {
98  Py_INCREF(base);
99  base_.reset(base);
100  }
101 
102  OwnedRefNoGIL base_;
103 };
104 
105 } // namespace py
106 } // namespace arrow
107 
108 #endif // PYARROW_IO_H
Definition: interfaces.h:111
class ARROW_NO_EXPORT PythonFile
Definition: io.h:37
Definition: interfaces.h:127
Definition: io.h:39
Definition: status.h:95
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
#define ARROW_NO_EXPORT
Definition: visibility.h:42
Definition: common.h:129
_object PyObject
Definition: serialize.h:30
Object containing a pointer to a piece of contiguous memory with a particular size.
Definition: buffer.h:50
Definition: io.h:68
Definition: io.h:90
::arrow::Buffer Buffer
Definition: memory.h:54