Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
common.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef ARROW_PYTHON_COMMON_H
19 #define ARROW_PYTHON_COMMON_H
20 
21 #include <memory>
22 #include <sstream>
23 #include <string>
24 #include <utility>
25 
26 #include "arrow/python/config.h"
27 
28 #include "arrow/buffer.h"
29 #include "arrow/util/macros.h"
30 #include "arrow/util/visibility.h"
31 
32 namespace arrow {
33 
34 class MemoryPool;
35 
36 namespace py {
37 
38 ARROW_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
39 
40 // Catch a pending Python exception and return the corresponding Status.
41 // If no exception is pending, Status::OK() is returned.
43  if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
44  return Status::OK();
45  } else {
46  return ConvertPyError(code);
47  }
48 }
49 
50 ARROW_EXPORT Status PassPyError();
51 
52 // TODO(wesm): We can just let errors pass through. To be explored later
53 #define RETURN_IF_PYERROR() ARROW_RETURN_NOT_OK(CheckPyError());
54 
55 #define PY_RETURN_IF_ERROR(CODE) ARROW_RETURN_NOT_OK(CheckPyError(CODE));
56 
57 class ARROW_EXPORT PyAcquireGIL {
58  public:
59  PyAcquireGIL() : acquired_gil_(false) { acquire(); }
60 
61  ~PyAcquireGIL() { release(); }
62 
63  void acquire() {
64  if (!acquired_gil_) {
65  state_ = PyGILState_Ensure();
66  acquired_gil_ = true;
67  }
68  }
69 
70  // idempotent
71  void release() {
72  if (acquired_gil_) {
73  PyGILState_Release(state_);
74  acquired_gil_ = false;
75  }
76  }
77 
78  private:
79  bool acquired_gil_;
80  PyGILState_STATE state_;
82 };
83 
84 #define PYARROW_IS_PY2 PY_MAJOR_VERSION <= 2
85 
86 // A RAII primitive that DECREFs the underlying PyObject* when it
87 // goes out of scope.
88 class ARROW_EXPORT OwnedRef {
89  public:
90  OwnedRef() : obj_(NULLPTR) {}
91  OwnedRef(OwnedRef&& other) : OwnedRef(other.detach()) {}
92  explicit OwnedRef(PyObject* obj) : obj_(obj) {}
93 
95  obj_ = other.detach();
96  return *this;
97  }
98 
99  ~OwnedRef() { reset(); }
100 
101  void reset(PyObject* obj) {
102  Py_XDECREF(obj_);
103  obj_ = obj;
104  }
105 
106  void reset() { reset(NULLPTR); }
107 
109  PyObject* result = obj_;
110  obj_ = NULLPTR;
111  return result;
112  }
113 
114  PyObject* obj() const { return obj_; }
115 
116  PyObject** ref() { return &obj_; }
117 
118  operator bool() const { return obj_ != NULLPTR; }
119 
120  private:
122 
123  PyObject* obj_;
124 };
125 
126 // Same as OwnedRef, but ensures the GIL is taken when it goes out of scope.
127 // This is for situations where the GIL is not always known to be held
128 // (e.g. if it is released in the middle of a function for performance reasons)
129 class ARROW_EXPORT OwnedRefNoGIL : public OwnedRef {
130  public:
132  OwnedRefNoGIL(OwnedRefNoGIL&& other) : OwnedRef(other.detach()) {}
133  explicit OwnedRefNoGIL(PyObject* obj) : OwnedRef(obj) {}
134 
136  PyAcquireGIL lock;
137  reset();
138  }
139 };
140 
141 // A temporary conversion of a Python object to a bytes area.
142 struct PyBytesView {
143  const char* bytes;
144  Py_ssize_t size;
145 
147 
148  // View the given Python object as binary-like, i.e. bytes
149  Status FromBinary(PyObject* obj) { return FromBinary(obj, "a bytes object"); }
150 
152  bool ignored = false;
153  return FromString(obj, false, &ignored);
154  }
155 
156  Status FromString(PyObject* obj, bool* is_utf8) {
157  return FromString(obj, true, is_utf8);
158  }
159 
161 #if PY_MAJOR_VERSION >= 3
162  Py_ssize_t size;
163  // The utf-8 representation is cached on the unicode object
164  const char* data = PyUnicode_AsUTF8AndSize(obj, &size);
166  this->bytes = data;
167  this->size = size;
168  this->ref.reset();
169 #else
170  PyObject* converted = PyUnicode_AsUTF8String(obj);
172  this->bytes = PyBytes_AS_STRING(converted);
173  this->size = PyBytes_GET_SIZE(converted);
174  this->ref.reset(converted);
175 #endif
176  return Status::OK();
177  }
178 
179  protected:
180  PyBytesView(const char* b, Py_ssize_t s, PyObject* obj = NULLPTR)
181  : bytes(b), size(s), ref(obj) {}
182 
183  // View the given Python object as string-like, i.e. str or (utf8) bytes
184  Status FromString(PyObject* obj, bool check_utf8, bool* is_utf8) {
185  if (PyUnicode_Check(obj)) {
186  *is_utf8 = true;
187  return FromUnicode(obj);
188  } else {
189  ARROW_RETURN_NOT_OK(FromBinary(obj, "a string or bytes object"));
190  if (check_utf8) {
191  // Check the bytes are utf8 utf-8
192  OwnedRef decoded(PyUnicode_FromStringAndSize(bytes, size));
193  if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
194  *is_utf8 = true;
195  } else {
196  *is_utf8 = false;
197  PyErr_Clear();
198  }
199  } else {
200  *is_utf8 = false;
201  }
202  return Status::OK();
203  }
204  }
205 
206  Status FromBinary(PyObject* obj, const char* expected_msg) {
207  if (PyBytes_Check(obj)) {
208  this->bytes = PyBytes_AS_STRING(obj);
209  this->size = PyBytes_GET_SIZE(obj);
210  this->ref.reset();
211  return Status::OK();
212  } else if (PyByteArray_Check(obj)) {
213  this->bytes = PyByteArray_AS_STRING(obj);
214  this->size = PyByteArray_GET_SIZE(obj);
215  this->ref.reset();
216  return Status::OK();
217  } else {
218  std::stringstream ss;
219  ss << "Expected " << expected_msg << ", got a '" << Py_TYPE(obj)->tp_name
220  << "' object";
221  return Status::TypeError(ss.str());
222  }
223  }
224 
226 };
227 
228 // Return the common PyArrow memory pool
229 ARROW_EXPORT void set_default_memory_pool(MemoryPool* pool);
230 ARROW_EXPORT MemoryPool* get_memory_pool();
231 
232 class ARROW_EXPORT PyBuffer : public Buffer {
233  public:
236  ~PyBuffer();
237 
238  static Status FromPyObject(PyObject* obj, std::shared_ptr<Buffer>* out);
239 
240  private:
241  PyBuffer();
242  Status Init(PyObject*);
243 
244  Py_buffer py_buf_;
245 };
246 
247 } // namespace py
248 } // namespace arrow
249 
250 #endif // ARROW_PYTHON_COMMON_H
OwnedRefNoGIL(OwnedRefNoGIL &&other)
Definition: common.h:132
Py_ssize_t size
Definition: common.h:144
OwnedRefNoGIL()
Definition: common.h:131
#define NULLPTR
Definition: macros.h:69
PyBytesView(const char *b, Py_ssize_t s, PyObject *obj=NULLPTR)
Definition: common.h:180
OwnedRefNoGIL(PyObject *obj)
Definition: common.h:133
void reset()
Definition: common.h:106
#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName)
Definition: macros.h:23
PyBytesView()
Definition: common.h:146
Status FromString(PyObject *obj)
Definition: common.h:151
OwnedRef()
Definition: common.h:90
StatusCode
Definition: status.h:70
const char * bytes
Definition: common.h:143
~OwnedRef()
Definition: common.h:99
PyObject * detach()
Definition: common.h:108
#define RETURN_IF_PYERROR()
Definition: common.h:53
PyObject * obj() const
Definition: common.h:114
Definition: status.h:95
void acquire()
Definition: common.h:63
void release()
Definition: common.h:71
Status FromBinary(PyObject *obj)
Definition: common.h:149
Definition: common.h:142
#define ARROW_PREDICT_TRUE(x)
Definition: macros.h:49
Status FromUnicode(PyObject *obj)
Definition: common.h:160
Status CheckPyError(StatusCode code=StatusCode::UnknownError)
Definition: common.h:42
static Status OK()
Definition: status.h:124
OwnedRef(OwnedRef &&other)
Definition: common.h:91
MemoryPool * get_memory_pool()
Definition: common.h:232
void reset(PyObject *obj)
Definition: common.h:101
static Status TypeError(const std::string &msg)
Definition: status.h:138
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
Status ConvertPyError(StatusCode code=StatusCode::UnknownError)
OwnedRef(PyObject *obj)
Definition: common.h:92
~OwnedRefNoGIL()
Definition: common.h:135
PyObject ** ref()
Definition: common.h:116
void set_default_memory_pool(MemoryPool *pool)
PyAcquireGIL()
Definition: common.h:59
Status FromString(PyObject *obj, bool check_utf8, bool *is_utf8)
Definition: common.h:184
Status FromString(PyObject *obj, bool *is_utf8)
Definition: common.h:156
Definition: common.h:57
OwnedRef & operator=(OwnedRef &&other)
Definition: common.h:94
#define ARROW_RETURN_NOT_OK(s)
Definition: status.h:44
OwnedRef ref
Definition: common.h:225
Status FromBinary(PyObject *obj, const char *expected_msg)
Definition: common.h:206
Definition: common.h:129
Base class for memory allocation.
Definition: memory_pool.h:34
~PyAcquireGIL()
Definition: common.h:61
_object PyObject
Definition: serialize.h:30
Object containing a pointer to a piece of contiguous memory with a particular size.
Definition: buffer.h:50
Definition: common.h:88
Status PassPyError()