Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
common.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef ARROW_PYTHON_COMMON_H
19 #define ARROW_PYTHON_COMMON_H
20 
21 #include <memory>
22 #include <sstream>
23 #include <string>
24 #include <utility>
25 
26 #include "arrow/python/config.h"
27 
28 #include "arrow/buffer.h"
29 #include "arrow/util/macros.h"
30 #include "arrow/util/visibility.h"
31 
32 namespace arrow {
33 
34 class MemoryPool;
35 
36 namespace py {
37 
38 ARROW_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
39 
40 // Catch a pending Python exception and return the corresponding Status.
41 // If no exception is pending, Status::OK() is returned.
43  if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
44  return Status::OK();
45  } else {
46  return ConvertPyError(code);
47  }
48 }
49 
50 ARROW_EXPORT Status PassPyError();
51 
52 // TODO(wesm): We can just let errors pass through. To be explored later
53 #define RETURN_IF_PYERROR() RETURN_NOT_OK(CheckPyError());
54 
55 #define PY_RETURN_IF_ERROR(CODE) RETURN_NOT_OK(CheckPyError(CODE));
56 
57 class ARROW_EXPORT PyAcquireGIL {
58  public:
59  PyAcquireGIL() : acquired_gil_(false) { acquire(); }
60 
61  ~PyAcquireGIL() { release(); }
62 
63  void acquire() {
64  if (!acquired_gil_) {
65  state_ = PyGILState_Ensure();
66  acquired_gil_ = true;
67  }
68  }
69 
70  // idempotent
71  void release() {
72  if (acquired_gil_) {
73  PyGILState_Release(state_);
74  acquired_gil_ = false;
75  }
76  }
77 
78  private:
79  bool acquired_gil_;
80  PyGILState_STATE state_;
82 };
83 
84 #define PYARROW_IS_PY2 PY_MAJOR_VERSION <= 2
85 
86 // A RAII primitive that DECREFs the underlying PyObject* when it
87 // goes out of scope.
88 class ARROW_EXPORT OwnedRef {
89  public:
90  OwnedRef() : obj_(NULLPTR) {}
91  OwnedRef(OwnedRef&& other) : OwnedRef(other.detach()) {}
92  explicit OwnedRef(PyObject* obj) : obj_(obj) {}
93 
95  obj_ = other.detach();
96  return *this;
97  }
98 
99  ~OwnedRef() { reset(); }
100 
101  void reset(PyObject* obj) {
102  Py_XDECREF(obj_);
103  obj_ = obj;
104  }
105 
106  void reset() { reset(NULLPTR); }
107 
109  PyObject* result = obj_;
110  obj_ = NULLPTR;
111  return result;
112  }
113 
114  PyObject* obj() const { return obj_; }
115 
116  PyObject** ref() { return &obj_; }
117 
118  operator bool() const { return obj_ != NULLPTR; }
119 
120  private:
122 
123  PyObject* obj_;
124 };
125 
126 // Same as OwnedRef, but ensures the GIL is taken when it goes out of scope.
127 // This is for situations where the GIL is not always known to be held
128 // (e.g. if it is released in the middle of a function for performance reasons)
129 class ARROW_EXPORT OwnedRefNoGIL : public OwnedRef {
130  public:
132  OwnedRefNoGIL(OwnedRefNoGIL&& other) : OwnedRef(other.detach()) {}
133  explicit OwnedRefNoGIL(PyObject* obj) : OwnedRef(obj) {}
134 
136  PyAcquireGIL lock;
137  reset();
138  }
139 };
140 
141 // A temporary conversion of a Python object to a bytes area.
142 struct ARROW_EXPORT PyBytesView {
143  const char* bytes;
144  Py_ssize_t size;
145 
146  PyBytesView() : bytes(NULLPTR), size(0), ref(NULLPTR) {}
147 
148  // View the given Python object as binary-like, i.e. bytes
149  Status FromBinary(PyObject* obj) { return FromBinary(obj, "a bytes object"); }
150 
151  // View the given Python object as string-like, i.e. str or (utf8) bytes
152  Status FromString(PyObject* obj, bool check_valid = false) {
153  if (PyUnicode_Check(obj)) {
154 #if PY_MAJOR_VERSION >= 3
155  Py_ssize_t size;
156  // The utf-8 representation is cached on the unicode object
157  const char* data = PyUnicode_AsUTF8AndSize(obj, &size);
159  this->bytes = data;
160  this->size = size;
161  this->ref.reset();
162  return Status::OK();
163 #else
164  PyObject* converted = PyUnicode_AsUTF8String(obj);
166  this->bytes = PyBytes_AS_STRING(converted);
167  this->size = PyBytes_GET_SIZE(converted);
168  this->ref.reset(converted);
169  return Status::OK();
170 #endif
171  } else {
172  RETURN_NOT_OK(FromBinary(obj, "a string or bytes object"));
173  if (check_valid) {
174  // Check the bytes are valid utf-8
175  OwnedRef decoded(PyUnicode_FromStringAndSize(bytes, size));
177  }
178  return Status::OK();
179  }
180  }
181 
182  protected:
183  PyBytesView(const char* b, Py_ssize_t s, PyObject* obj = NULLPTR)
184  : bytes(b), size(s), ref(obj) {}
185 
186  Status FromBinary(PyObject* obj, const char* expected_msg) {
187  if (PyBytes_Check(obj)) {
188  this->bytes = PyBytes_AS_STRING(obj);
189  this->size = PyBytes_GET_SIZE(obj);
190  this->ref.reset();
191  return Status::OK();
192  } else if (PyByteArray_Check(obj)) {
193  this->bytes = PyByteArray_AS_STRING(obj);
194  this->size = PyByteArray_GET_SIZE(obj);
195  this->ref.reset();
196  return Status::OK();
197  } else {
198  std::stringstream ss;
199  ss << "Expected " << expected_msg << ", got a '" << Py_TYPE(obj)->tp_name
200  << "' object";
201  return Status::TypeError(ss.str());
202  }
203  }
204 
206 };
207 
208 // Return the common PyArrow memory pool
209 ARROW_EXPORT void set_default_memory_pool(MemoryPool* pool);
210 ARROW_EXPORT MemoryPool* get_memory_pool();
211 
212 class ARROW_EXPORT PyBuffer : public Buffer {
213  public:
216  ~PyBuffer();
217 
218  static Status FromPyObject(PyObject* obj, std::shared_ptr<Buffer>* out);
219 
220  private:
221  PyBuffer();
222  Status Init(PyObject*);
223 
224  Py_buffer py_buf_;
225 };
226 
227 } // namespace py
228 } // namespace arrow
229 
230 #endif // ARROW_PYTHON_COMMON_H
OwnedRefNoGIL(OwnedRefNoGIL &&other)
Definition: common.h:132
Py_ssize_t size
Definition: common.h:144
OwnedRefNoGIL()
Definition: common.h:131
PyBytesView(const char *b, Py_ssize_t s, PyObject *obj=NULLPTR)
Definition: common.h:183
OwnedRefNoGIL(PyObject *obj)
Definition: common.h:133
void reset()
Definition: common.h:106
PyBytesView()
Definition: common.h:146
#define ARROW_PREDICT_TRUE(x)
Definition: macros.h:49
OwnedRef()
Definition: common.h:90
#define NULLPTR
Definition: macros.h:69
StatusCode
Definition: status.h:71
const char * bytes
Definition: common.h:143
~OwnedRef()
Definition: common.h:99
PyObject * detach()
Definition: common.h:108
#define RETURN_IF_PYERROR()
Definition: common.h:53
PyObject * obj() const
Definition: common.h:114
Definition: status.h:94
void acquire()
Definition: common.h:63
void release()
Definition: common.h:71
Status FromBinary(PyObject *obj)
Definition: common.h:149
Definition: common.h:142
Status CheckPyError(StatusCode code=StatusCode::UnknownError)
Definition: common.h:42
#define RETURN_NOT_OK(s)
Definition: status.h:44
static Status OK()
Definition: status.h:123
OwnedRef(OwnedRef &&other)
Definition: common.h:91
_object PyObject
Definition: python_to_arrow.h:30
MemoryPool * get_memory_pool()
Definition: common.h:212
void reset(PyObject *obj)
Definition: common.h:101
static Status TypeError(const std::string &msg)
Definition: status.h:134
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
Status ConvertPyError(StatusCode code=StatusCode::UnknownError)
OwnedRef(PyObject *obj)
Definition: common.h:92
~OwnedRefNoGIL()
Definition: common.h:135
PyObject ** ref()
Definition: common.h:116
void set_default_memory_pool(MemoryPool *pool)
PyAcquireGIL()
Definition: common.h:59
Definition: common.h:57
OwnedRef & operator=(OwnedRef &&other)
Definition: common.h:94
OwnedRef ref
Definition: common.h:205
Status FromBinary(PyObject *obj, const char *expected_msg)
Definition: common.h:186
Definition: common.h:129
Base class for memory allocation.
Definition: memory_pool.h:34
~PyAcquireGIL()
Definition: common.h:61
Object containing a pointer to a piece of contiguous memory with a particular size.
Definition: buffer.h:48
#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName)
Definition: macros.h:23
Status FromString(PyObject *obj, bool check_valid=false)
Definition: common.h:152
Definition: common.h:88
Status PassPyError()