Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
type_traits.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // Internal header
19 
20 #include "arrow/python/platform.h"
21 
22 #include <cstdint>
23 #include <limits>
24 
26 
27 #include <numpy/halffloat.h>
28 
29 #include "arrow/builder.h"
30 #include "arrow/type.h"
31 #include "arrow/util/logging.h"
32 
33 namespace arrow {
34 namespace py {
35 namespace internal {
36 
37 template <int TYPE>
38 struct npy_traits {};
39 
40 template <>
41 struct npy_traits<NPY_BOOL> {
42  typedef uint8_t value_type;
43  using TypeClass = BooleanType;
44  using BuilderClass = BooleanBuilder;
45 
46  static constexpr bool supports_nulls = false;
47  static inline bool isnull(uint8_t v) { return false; }
48 };
49 
50 #define NPY_INT_DECL(TYPE, CapType, T) \
51  template <> \
52  struct npy_traits<NPY_##TYPE> { \
53  typedef T value_type; \
54  using TypeClass = CapType##Type; \
55  using BuilderClass = CapType##Builder; \
56  \
57  static constexpr bool supports_nulls = false; \
58  static inline bool isnull(T v) { return false; } \
59  };
60 
61 NPY_INT_DECL(INT8, Int8, int8_t);
62 NPY_INT_DECL(INT16, Int16, int16_t);
63 NPY_INT_DECL(INT32, Int32, int32_t);
64 NPY_INT_DECL(INT64, Int64, int64_t);
65 
66 NPY_INT_DECL(UINT8, UInt8, uint8_t);
67 NPY_INT_DECL(UINT16, UInt16, uint16_t);
68 NPY_INT_DECL(UINT32, UInt32, uint32_t);
69 NPY_INT_DECL(UINT64, UInt64, uint64_t);
70 
71 #if NPY_INT64 != NPY_LONGLONG
72 NPY_INT_DECL(LONGLONG, Int64, int64_t);
73 NPY_INT_DECL(ULONGLONG, UInt64, uint64_t);
74 #endif
75 
76 template <>
77 struct npy_traits<NPY_FLOAT16> {
78  typedef npy_half value_type;
79  using TypeClass = HalfFloatType;
80  using BuilderClass = HalfFloatBuilder;
81 
82  static constexpr bool supports_nulls = true;
83 
84  static inline bool isnull(npy_half v) { return v == NPY_HALF_NAN; }
85 };
86 
87 template <>
88 struct npy_traits<NPY_FLOAT32> {
89  typedef float value_type;
90  using TypeClass = FloatType;
91  using BuilderClass = FloatBuilder;
92 
93  static constexpr bool supports_nulls = true;
94 
95  static inline bool isnull(float v) { return v != v; }
96 };
97 
98 template <>
99 struct npy_traits<NPY_FLOAT64> {
100  typedef double value_type;
101  using TypeClass = DoubleType;
102  using BuilderClass = DoubleBuilder;
103 
104  static constexpr bool supports_nulls = true;
105 
106  static inline bool isnull(double v) { return v != v; }
107 };
108 
109 template <>
110 struct npy_traits<NPY_DATETIME> {
111  typedef int64_t value_type;
112  using TypeClass = TimestampType;
113  using BuilderClass = TimestampBuilder;
114 
115  static constexpr bool supports_nulls = true;
116 
117  static inline bool isnull(int64_t v) {
118  // NaT = -2**63
119  // = -0x8000000000000000
120  // = -9223372036854775808;
121  // = std::numeric_limits<int64_t>::min()
122  return v == std::numeric_limits<int64_t>::min();
123  }
124 };
125 
126 template <>
127 struct npy_traits<NPY_OBJECT> {
128  typedef PyObject* value_type;
129  static constexpr bool supports_nulls = true;
130 };
131 
132 template <int TYPE>
133 struct arrow_traits {};
134 
135 template <>
136 struct arrow_traits<Type::BOOL> {
137  static constexpr int npy_type = NPY_BOOL;
138  static constexpr bool supports_nulls = false;
139 };
140 
141 #define INT_DECL(TYPE) \
142  template <> \
143  struct arrow_traits<Type::TYPE> { \
144  static constexpr int npy_type = NPY_##TYPE; \
145  static constexpr bool supports_nulls = false; \
146  static constexpr double na_value = NAN; \
147  typedef typename npy_traits<NPY_##TYPE>::value_type T; \
148  };
149 
150 INT_DECL(INT8);
151 INT_DECL(INT16);
152 INT_DECL(INT32);
153 INT_DECL(INT64);
154 INT_DECL(UINT8);
155 INT_DECL(UINT16);
156 INT_DECL(UINT32);
157 INT_DECL(UINT64);
158 
159 template <>
160 struct arrow_traits<Type::HALF_FLOAT> {
161  static constexpr int npy_type = NPY_FLOAT16;
162  static constexpr bool supports_nulls = true;
163  static constexpr uint16_t na_value = NPY_HALF_NAN;
164  typedef typename npy_traits<NPY_FLOAT16>::value_type T;
165 };
166 
167 template <>
168 struct arrow_traits<Type::FLOAT> {
169  static constexpr int npy_type = NPY_FLOAT32;
170  static constexpr bool supports_nulls = true;
171  static constexpr float na_value = NAN;
172  typedef typename npy_traits<NPY_FLOAT32>::value_type T;
173 };
174 
175 template <>
176 struct arrow_traits<Type::DOUBLE> {
177  static constexpr int npy_type = NPY_FLOAT64;
178  static constexpr bool supports_nulls = true;
179  static constexpr double na_value = NAN;
180  typedef typename npy_traits<NPY_FLOAT64>::value_type T;
181 };
182 
183 static constexpr int64_t kPandasTimestampNull = std::numeric_limits<int64_t>::min();
184 
185 constexpr int64_t kNanosecondsInDay = 86400000000000LL;
186 
187 template <>
188 struct arrow_traits<Type::TIMESTAMP> {
189  static constexpr int npy_type = NPY_DATETIME;
190  static constexpr int64_t npy_shift = 1;
191 
192  static constexpr bool supports_nulls = true;
193  static constexpr int64_t na_value = kPandasTimestampNull;
194  typedef typename npy_traits<NPY_DATETIME>::value_type T;
195 };
196 
197 template <>
198 struct arrow_traits<Type::DATE32> {
199  // Data stores as FR_D day unit
200  static constexpr int npy_type = NPY_DATETIME;
201  static constexpr int64_t npy_shift = 1;
202 
203  static constexpr bool supports_nulls = true;
204  typedef typename npy_traits<NPY_DATETIME>::value_type T;
205 
206  static constexpr int64_t na_value = kPandasTimestampNull;
207  static inline bool isnull(int64_t v) { return npy_traits<NPY_DATETIME>::isnull(v); }
208 };
209 
210 template <>
211 struct arrow_traits<Type::DATE64> {
212  // Data stores as FR_D day unit
213  static constexpr int npy_type = NPY_DATETIME;
214 
215  // There are 1000 * 60 * 60 * 24 = 86400000ms in a day
216  static constexpr int64_t npy_shift = 86400000;
217 
218  static constexpr bool supports_nulls = true;
219  typedef typename npy_traits<NPY_DATETIME>::value_type T;
220 
221  static constexpr int64_t na_value = kPandasTimestampNull;
222  static inline bool isnull(int64_t v) { return npy_traits<NPY_DATETIME>::isnull(v); }
223 };
224 
225 template <>
226 struct arrow_traits<Type::TIME32> {
227  static constexpr int npy_type = NPY_OBJECT;
228  static constexpr bool supports_nulls = true;
229  static constexpr int64_t na_value = kPandasTimestampNull;
230  typedef typename npy_traits<NPY_DATETIME>::value_type T;
231 };
232 
233 template <>
234 struct arrow_traits<Type::TIME64> {
235  static constexpr int npy_type = NPY_OBJECT;
236  static constexpr bool supports_nulls = true;
237  typedef typename npy_traits<NPY_DATETIME>::value_type T;
238 };
239 
240 template <>
241 struct arrow_traits<Type::STRING> {
242  static constexpr int npy_type = NPY_OBJECT;
243  static constexpr bool supports_nulls = true;
244 };
245 
246 template <>
247 struct arrow_traits<Type::BINARY> {
248  static constexpr int npy_type = NPY_OBJECT;
249  static constexpr bool supports_nulls = true;
250 };
251 
252 static inline int NumPyTypeSize(int npy_type) {
253  switch (npy_type) {
254  case NPY_BOOL:
255  return 1;
256  case NPY_INT8:
257  return 1;
258  case NPY_INT16:
259  return 2;
260  case NPY_INT32:
261  return 4;
262  case NPY_INT64:
263  return 8;
264 #if (NPY_INT64 != NPY_LONGLONG)
265  case NPY_LONGLONG:
266  return 8;
267 #endif
268  case NPY_UINT8:
269  return 1;
270  case NPY_UINT16:
271  return 2;
272  case NPY_UINT32:
273  return 4;
274  case NPY_UINT64:
275  return 8;
276 #if (NPY_UINT64 != NPY_ULONGLONG)
277  case NPY_ULONGLONG:
278  return 8;
279 #endif
280  case NPY_FLOAT16:
281  return 2;
282  case NPY_FLOAT32:
283  return 4;
284  case NPY_FLOAT64:
285  return 8;
286  case NPY_DATETIME:
287  return 8;
288  case NPY_OBJECT:
289  return sizeof(void*);
290  default:
291  DCHECK(false) << "unhandled numpy type";
292  break;
293  }
294  return -1;
295 }
296 
297 } // namespace internal
298 } // namespace py
299 } // namespace arrow
NumericBuilder< FloatType > FloatBuilder
Definition: builder.h:311
#define NPY_INT_DECL(TYPE, CapType, T)
Definition: type_traits.h:50
NumericBuilder< HalfFloatType > HalfFloatBuilder
Definition: builder.h:310
#define INT_DECL(TYPE)
Definition: type_traits.h:141
Top-level namespace for Apache Arrow C++ API.
Definition: allocator.h:29
#define DCHECK(condition)
Definition: logging.h:78
NumericBuilder< DoubleType > DoubleBuilder
Definition: builder.h:312
NumericBuilder< TimestampType > TimestampBuilder
Definition: builder.h:304