Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
type_traits.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // Internal header
19 
20 #include "arrow/python/platform.h"
21 
22 #include <cstdint>
23 #include <limits>
24 
26 
27 #include <numpy/halffloat.h>
28 
29 #include "arrow/builder.h"
30 #include "arrow/type.h"
31 #include "arrow/util/logging.h"
32 
33 namespace arrow {
34 namespace py {
35 namespace internal {
36 
37 //
38 // Type traits for Numpy -> Arrow equivalence
39 //
40 template <int TYPE>
41 struct npy_traits {};
42 
43 template <>
44 struct npy_traits<NPY_BOOL> {
45  typedef uint8_t value_type;
46  using TypeClass = BooleanType;
47  using BuilderClass = BooleanBuilder;
48 
49  static constexpr bool supports_nulls = false;
50  static inline bool isnull(uint8_t v) { return false; }
51 };
52 
53 #define NPY_INT_DECL(TYPE, CapType, T) \
54  template <> \
55  struct npy_traits<NPY_##TYPE> { \
56  typedef T value_type; \
57  using TypeClass = CapType##Type; \
58  using BuilderClass = CapType##Builder; \
59  \
60  static constexpr bool supports_nulls = false; \
61  static inline bool isnull(T v) { return false; } \
62  };
63 
64 NPY_INT_DECL(INT8, Int8, int8_t);
65 NPY_INT_DECL(INT16, Int16, int16_t);
66 NPY_INT_DECL(INT32, Int32, int32_t);
67 NPY_INT_DECL(INT64, Int64, int64_t);
68 
69 NPY_INT_DECL(UINT8, UInt8, uint8_t);
70 NPY_INT_DECL(UINT16, UInt16, uint16_t);
71 NPY_INT_DECL(UINT32, UInt32, uint32_t);
72 NPY_INT_DECL(UINT64, UInt64, uint64_t);
73 
74 #if !NPY_INT32_IS_INT && NPY_BITSOF_INT == 32
75 NPY_INT_DECL(INT, Int32, int32_t);
76 NPY_INT_DECL(UINT, UInt32, uint32_t);
77 #endif
78 #if !NPY_INT64_IS_LONG_LONG && NPY_BITSOF_LONGLONG == 64
79 NPY_INT_DECL(LONGLONG, Int64, int64_t);
80 NPY_INT_DECL(ULONGLONG, UInt64, uint64_t);
81 #endif
82 
83 template <>
84 struct npy_traits<NPY_FLOAT16> {
85  typedef npy_half value_type;
86  using TypeClass = HalfFloatType;
87  using BuilderClass = HalfFloatBuilder;
88 
89  static constexpr bool supports_nulls = true;
90 
91  static inline bool isnull(npy_half v) { return v == NPY_HALF_NAN; }
92 };
93 
94 template <>
95 struct npy_traits<NPY_FLOAT32> {
96  typedef float value_type;
97  using TypeClass = FloatType;
98  using BuilderClass = FloatBuilder;
99 
100  static constexpr bool supports_nulls = true;
101 
102  static inline bool isnull(float v) { return v != v; }
103 };
104 
105 template <>
106 struct npy_traits<NPY_FLOAT64> {
107  typedef double value_type;
108  using TypeClass = DoubleType;
109  using BuilderClass = DoubleBuilder;
110 
111  static constexpr bool supports_nulls = true;
112 
113  static inline bool isnull(double v) { return v != v; }
114 };
115 
116 template <>
117 struct npy_traits<NPY_DATETIME> {
118  typedef int64_t value_type;
119  using TypeClass = TimestampType;
120  using BuilderClass = TimestampBuilder;
121 
122  static constexpr bool supports_nulls = true;
123 
124  static inline bool isnull(int64_t v) {
125  // NaT = -2**63
126  // = -0x8000000000000000
127  // = -9223372036854775808;
128  // = std::numeric_limits<int64_t>::min()
129  return v == std::numeric_limits<int64_t>::min();
130  }
131 };
132 
133 template <>
134 struct npy_traits<NPY_OBJECT> {
135  typedef PyObject* value_type;
136  static constexpr bool supports_nulls = true;
137 
138  static inline bool isnull(PyObject* v) { return v == Py_None; }
139 };
140 
141 //
142 // Type traits for Arrow -> Numpy equivalence
143 // Note *supports_nulls* means the equivalent Numpy type support nulls
144 //
145 template <int TYPE>
146 struct arrow_traits {};
147 
148 template <>
149 struct arrow_traits<Type::BOOL> {
150  static constexpr int npy_type = NPY_BOOL;
151  static constexpr bool supports_nulls = false;
152 };
153 
154 #define INT_DECL(TYPE) \
155  template <> \
156  struct arrow_traits<Type::TYPE> { \
157  static constexpr int npy_type = NPY_##TYPE; \
158  static constexpr bool supports_nulls = false; \
159  static constexpr double na_value = NAN; \
160  typedef typename npy_traits<NPY_##TYPE>::value_type T; \
161  };
162 
163 INT_DECL(INT8);
164 INT_DECL(INT16);
165 INT_DECL(INT32);
166 INT_DECL(INT64);
167 INT_DECL(UINT8);
168 INT_DECL(UINT16);
169 INT_DECL(UINT32);
170 INT_DECL(UINT64);
171 
172 template <>
173 struct arrow_traits<Type::HALF_FLOAT> {
174  static constexpr int npy_type = NPY_FLOAT16;
175  static constexpr bool supports_nulls = true;
176  static constexpr uint16_t na_value = NPY_HALF_NAN;
177  typedef typename npy_traits<NPY_FLOAT16>::value_type T;
178 };
179 
180 template <>
181 struct arrow_traits<Type::FLOAT> {
182  static constexpr int npy_type = NPY_FLOAT32;
183  static constexpr bool supports_nulls = true;
184  static constexpr float na_value = NAN;
185  typedef typename npy_traits<NPY_FLOAT32>::value_type T;
186 };
187 
188 template <>
189 struct arrow_traits<Type::DOUBLE> {
190  static constexpr int npy_type = NPY_FLOAT64;
191  static constexpr bool supports_nulls = true;
192  static constexpr double na_value = NAN;
193  typedef typename npy_traits<NPY_FLOAT64>::value_type T;
194 };
195 
196 static constexpr int64_t kPandasTimestampNull = std::numeric_limits<int64_t>::min();
197 
198 constexpr int64_t kNanosecondsInDay = 86400000000000LL;
199 
200 template <>
201 struct arrow_traits<Type::TIMESTAMP> {
202  static constexpr int npy_type = NPY_DATETIME;
203  static constexpr int64_t npy_shift = 1;
204 
205  static constexpr bool supports_nulls = true;
206  static constexpr int64_t na_value = kPandasTimestampNull;
207  typedef typename npy_traits<NPY_DATETIME>::value_type T;
208 };
209 
210 template <>
211 struct arrow_traits<Type::DATE32> {
212  // Data stores as FR_D day unit
213  static constexpr int npy_type = NPY_DATETIME;
214  static constexpr int64_t npy_shift = 1;
215 
216  static constexpr bool supports_nulls = true;
217  typedef typename npy_traits<NPY_DATETIME>::value_type T;
218 
219  static constexpr int64_t na_value = kPandasTimestampNull;
220  static inline bool isnull(int64_t v) { return npy_traits<NPY_DATETIME>::isnull(v); }
221 };
222 
223 template <>
224 struct arrow_traits<Type::DATE64> {
225  // Data stores as FR_D day unit
226  static constexpr int npy_type = NPY_DATETIME;
227 
228  // There are 1000 * 60 * 60 * 24 = 86400000ms in a day
229  static constexpr int64_t npy_shift = 86400000;
230 
231  static constexpr bool supports_nulls = true;
232  typedef typename npy_traits<NPY_DATETIME>::value_type T;
233 
234  static constexpr int64_t na_value = kPandasTimestampNull;
235  static inline bool isnull(int64_t v) { return npy_traits<NPY_DATETIME>::isnull(v); }
236 };
237 
238 template <>
239 struct arrow_traits<Type::TIME32> {
240  static constexpr int npy_type = NPY_OBJECT;
241  static constexpr bool supports_nulls = true;
242  static constexpr int64_t na_value = kPandasTimestampNull;
243  typedef typename npy_traits<NPY_DATETIME>::value_type T;
244 };
245 
246 template <>
247 struct arrow_traits<Type::TIME64> {
248  static constexpr int npy_type = NPY_OBJECT;
249  static constexpr bool supports_nulls = true;
250  typedef typename npy_traits<NPY_DATETIME>::value_type T;
251 };
252 
253 template <>
254 struct arrow_traits<Type::STRING> {
255  static constexpr int npy_type = NPY_OBJECT;
256  static constexpr bool supports_nulls = true;
257 };
258 
259 template <>
260 struct arrow_traits<Type::BINARY> {
261  static constexpr int npy_type = NPY_OBJECT;
262  static constexpr bool supports_nulls = true;
263 };
264 
265 static inline int NumPyTypeSize(int npy_type) {
266  npy_type = fix_numpy_type_num(npy_type);
267 
268  switch (npy_type) {
269  case NPY_BOOL:
270  case NPY_INT8:
271  case NPY_UINT8:
272  return 1;
273  case NPY_INT16:
274  case NPY_UINT16:
275  return 2;
276  case NPY_INT32:
277  case NPY_UINT32:
278  return 4;
279  case NPY_INT64:
280  case NPY_UINT64:
281  return 8;
282  case NPY_FLOAT16:
283  return 2;
284  case NPY_FLOAT32:
285  return 4;
286  case NPY_FLOAT64:
287  return 8;
288  case NPY_DATETIME:
289  return 8;
290  case NPY_OBJECT:
291  return sizeof(void*);
292  default:
293  DCHECK(false) << "unhandled numpy type";
294  break;
295  }
296  return -1;
297 }
298 
299 } // namespace internal
300 } // namespace py
301 } // namespace arrow
NumericBuilder< FloatType > FloatBuilder
Definition: builder.h:440
#define NPY_INT_DECL(TYPE, CapType, T)
Definition: type_traits.h:53
NumericBuilder< HalfFloatType > HalfFloatBuilder
Definition: builder.h:439
_object PyObject
Definition: python_to_arrow.h:30
#define INT_DECL(TYPE)
Definition: type_traits.h:154
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
int fix_numpy_type_num(int type_num)
Definition: numpy_interop.h:84
#define DCHECK(condition)
Definition: logging.h:93
NumericBuilder< DoubleType > DoubleBuilder
Definition: builder.h:441
NumericBuilder< TimestampType > TimestampBuilder
Definition: builder.h:433