Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
type.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef ARROW_TYPE_H
19 #define ARROW_TYPE_H
20 
21 #include <climits>
22 #include <cstdint>
23 #include <memory>
24 #include <ostream>
25 #include <string>
26 #include <unordered_map>
27 #include <vector>
28 
29 #include "arrow/status.h"
30 #include "arrow/type_fwd.h" // IWYU pragma: export
32 #include "arrow/util/macros.h"
33 #include "arrow/util/visibility.h"
34 #include "arrow/visitor.h"
35 
36 namespace arrow {
37 
44 struct Type {
45  enum type {
47  NA,
48 
51 
54 
57 
60 
63 
66 
69 
72 
75 
78 
81 
84 
87 
90 
93 
96 
99 
103 
107 
111 
114 
118 
121 
124 
127 
130 
133  };
134 };
135 
136 enum class BufferType : char { DATA, OFFSET, TYPE, VALIDITY };
137 
138 class BufferDescr {
139  public:
140  BufferDescr(BufferType type, int bit_width) : type_(type), bit_width_(bit_width) {}
141 
142  BufferType type() const { return type_; }
143  int bit_width() const { return bit_width_; }
144 
145  private:
146  BufferType type_;
147  int bit_width_;
148 };
149 
150 class ARROW_EXPORT DataType {
151  public:
152  explicit DataType(Type::type id) : id_(id) {}
153  virtual ~DataType();
154 
155  // Return whether the types are equal
156  //
157  // Types that are logically convertable from one to another e.g. List<UInt8>
158  // and Binary are NOT equal).
159  virtual bool Equals(const DataType& other) const;
160  bool Equals(const std::shared_ptr<DataType>& other) const;
161 
162  std::shared_ptr<Field> child(int i) const { return children_[i]; }
163 
164  const std::vector<std::shared_ptr<Field>>& children() const { return children_; }
165 
166  int num_children() const { return static_cast<int>(children_.size()); }
167 
168  virtual Status Accept(TypeVisitor* visitor) const = 0;
169 
171  virtual std::string ToString() const = 0;
172 
177  virtual std::string name() const = 0;
178 
179  virtual std::vector<BufferDescr> GetBufferLayout() const = 0;
180 
181  Type::type id() const { return id_; }
182 
183  protected:
185  std::vector<std::shared_ptr<Field>> children_;
186 
187  private:
189 };
190 
191 inline std::ostream& operator<<(std::ostream& os, const DataType& type) {
192  os << type.ToString();
193  return os;
194 }
195 
196 // TODO(wesm): Remove this from parquet-cpp
197 using TypePtr = std::shared_ptr<DataType>;
198 
199 class ARROW_EXPORT FixedWidthType : public DataType {
200  public:
201  using DataType::DataType;
202 
203  virtual int bit_width() const = 0;
204 
205  std::vector<BufferDescr> GetBufferLayout() const override;
206 };
207 
208 class ARROW_EXPORT PrimitiveCType : public FixedWidthType {
209  public:
210  using FixedWidthType::FixedWidthType;
211 };
212 
213 class ARROW_EXPORT Number : public PrimitiveCType {
214  public:
215  using PrimitiveCType::PrimitiveCType;
216 };
217 
218 class ARROW_EXPORT Integer : public Number {
219  public:
220  using Number::Number;
221  virtual bool is_signed() const = 0;
222 };
223 
224 class ARROW_EXPORT FloatingPoint : public Number {
225  public:
226  using Number::Number;
227  enum Precision { HALF, SINGLE, DOUBLE };
228  virtual Precision precision() const = 0;
229 };
230 
233 class ParametricType {};
234 
235 class ARROW_EXPORT NestedType : public DataType, public ParametricType {
236  public:
237  using DataType::DataType;
238 };
239 
240 class NoExtraMeta {};
241 
242 // A field is a piece of metadata that includes (for now) a name and a data
243 // type
244 class ARROW_EXPORT Field {
245  public:
246  Field(const std::string& name, const std::shared_ptr<DataType>& type,
247  bool nullable = true,
248  const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR)
249  : name_(name), type_(type), nullable_(nullable), metadata_(metadata) {}
250 
251  std::shared_ptr<const KeyValueMetadata> metadata() const { return metadata_; }
252 
253 #ifndef ARROW_NO_DEPRECATED_API
254  Status AddMetadata(const std::shared_ptr<const KeyValueMetadata>& metadata,
256  std::shared_ptr<Field>* out) const;
257 #endif
258 
259  std::shared_ptr<Field> AddMetadata(
260  const std::shared_ptr<const KeyValueMetadata>& metadata) const;
261  std::shared_ptr<Field> RemoveMetadata() const;
262 
263  bool Equals(const Field& other) const;
264  bool Equals(const std::shared_ptr<Field>& other) const;
265 
266  std::string ToString() const;
267 
268  const std::string& name() const { return name_; }
269  std::shared_ptr<DataType> type() const { return type_; }
270  bool nullable() const { return nullable_; }
271 
272  private:
273  // Field name
274  std::string name_;
275 
276  // The field's data type
277  std::shared_ptr<DataType> type_;
278 
279  // Fields can be nullable
280  bool nullable_;
281 
282  // The field's metadata, if any
283  std::shared_ptr<const KeyValueMetadata> metadata_;
284 };
285 
286 namespace detail {
287 
288 template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
289 class ARROW_EXPORT CTypeImpl : public BASE {
290  public:
291  using c_type = C_TYPE;
292  static constexpr Type::type type_id = TYPE_ID;
293 
294  CTypeImpl() : BASE(TYPE_ID) {}
295 
296  int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * CHAR_BIT); }
297 
298  Status Accept(TypeVisitor* visitor) const override {
299  return visitor->Visit(*static_cast<const DERIVED*>(this));
300  }
301 
302  std::string ToString() const override { return this->name(); }
303 };
304 
305 template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
306 class IntegerTypeImpl : public detail::CTypeImpl<DERIVED, Integer, TYPE_ID, C_TYPE> {
307  bool is_signed() const override { return std::is_signed<C_TYPE>::value; }
308 };
309 
310 } // namespace detail
311 
312 class ARROW_EXPORT NullType : public DataType, public NoExtraMeta {
313  public:
314  static constexpr Type::type type_id = Type::NA;
315 
317 
318  Status Accept(TypeVisitor* visitor) const override;
319  std::string ToString() const override;
320 
321  std::string name() const override { return "null"; }
322 
323  std::vector<BufferDescr> GetBufferLayout() const override;
324 };
325 
326 class ARROW_EXPORT BooleanType : public FixedWidthType, public NoExtraMeta {
327  public:
328  static constexpr Type::type type_id = Type::BOOL;
329 
331 
332  Status Accept(TypeVisitor* visitor) const override;
333  std::string ToString() const override;
334 
335  int bit_width() const override { return 1; }
336  std::string name() const override { return "bool"; }
337 };
338 
339 class ARROW_EXPORT UInt8Type
340  : public detail::IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> {
341  public:
342  std::string name() const override { return "uint8"; }
343 };
344 
345 class ARROW_EXPORT Int8Type
346  : public detail::IntegerTypeImpl<Int8Type, Type::INT8, int8_t> {
347  public:
348  std::string name() const override { return "int8"; }
349 };
350 
351 class ARROW_EXPORT UInt16Type
352  : public detail::IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> {
353  public:
354  std::string name() const override { return "uint16"; }
355 };
356 
357 class ARROW_EXPORT Int16Type
358  : public detail::IntegerTypeImpl<Int16Type, Type::INT16, int16_t> {
359  public:
360  std::string name() const override { return "int16"; }
361 };
362 
363 class ARROW_EXPORT UInt32Type
364  : public detail::IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> {
365  public:
366  std::string name() const override { return "uint32"; }
367 };
368 
369 class ARROW_EXPORT Int32Type
370  : public detail::IntegerTypeImpl<Int32Type, Type::INT32, int32_t> {
371  public:
372  std::string name() const override { return "int32"; }
373 };
374 
375 class ARROW_EXPORT UInt64Type
376  : public detail::IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> {
377  public:
378  std::string name() const override { return "uint64"; }
379 };
380 
381 class ARROW_EXPORT Int64Type
382  : public detail::IntegerTypeImpl<Int64Type, Type::INT64, int64_t> {
383  public:
384  std::string name() const override { return "int64"; }
385 };
386 
387 class ARROW_EXPORT HalfFloatType
388  : public detail::CTypeImpl<HalfFloatType, FloatingPoint, Type::HALF_FLOAT, uint16_t> {
389  public:
390  Precision precision() const override;
391  std::string name() const override { return "halffloat"; }
392 };
393 
394 class ARROW_EXPORT FloatType
395  : public detail::CTypeImpl<FloatType, FloatingPoint, Type::FLOAT, float> {
396  public:
397  Precision precision() const override;
398  std::string name() const override { return "float"; }
399 };
400 
401 class ARROW_EXPORT DoubleType
402  : public detail::CTypeImpl<DoubleType, FloatingPoint, Type::DOUBLE, double> {
403  public:
404  Precision precision() const override;
405  std::string name() const override { return "double"; }
406 };
407 
408 class ARROW_EXPORT ListType : public NestedType {
409  public:
410  static constexpr Type::type type_id = Type::LIST;
411 
412  // List can contain any other logical value type
413  explicit ListType(const std::shared_ptr<DataType>& value_type)
414  : ListType(std::make_shared<Field>("item", value_type)) {}
415 
416  explicit ListType(const std::shared_ptr<Field>& value_field) : NestedType(Type::LIST) {
417  children_ = {value_field};
418  }
419 
420  std::shared_ptr<Field> value_field() const { return children_[0]; }
421 
422  std::shared_ptr<DataType> value_type() const { return children_[0]->type(); }
423 
424  Status Accept(TypeVisitor* visitor) const override;
425  std::string ToString() const override;
426 
427  std::string name() const override { return "list"; }
428 
429  std::vector<BufferDescr> GetBufferLayout() const override;
430 };
431 
432 // BinaryType type is represents lists of 1-byte values.
433 class ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
434  public:
435  static constexpr Type::type type_id = Type::BINARY;
436 
438 
439  Status Accept(TypeVisitor* visitor) const override;
440  std::string ToString() const override;
441  std::string name() const override { return "binary"; }
442 
443  std::vector<BufferDescr> GetBufferLayout() const override;
444 
445  protected:
446  // Allow subclasses to change the logical type.
447  explicit BinaryType(Type::type logical_type) : DataType(logical_type) {}
448 };
449 
450 // BinaryType type is represents lists of 1-byte values.
451 class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public ParametricType {
452  public:
453  static constexpr Type::type type_id = Type::FIXED_SIZE_BINARY;
454 
455  explicit FixedSizeBinaryType(int32_t byte_width)
456  : FixedWidthType(Type::FIXED_SIZE_BINARY), byte_width_(byte_width) {}
457  explicit FixedSizeBinaryType(int32_t byte_width, Type::type type_id)
458  : FixedWidthType(type_id), byte_width_(byte_width) {}
459 
460  Status Accept(TypeVisitor* visitor) const override;
461  std::string ToString() const override;
462  std::string name() const override { return "fixed_size_binary"; }
463 
464  std::vector<BufferDescr> GetBufferLayout() const override;
465 
466  int32_t byte_width() const { return byte_width_; }
467  int bit_width() const override;
468 
469  protected:
470  int32_t byte_width_;
471 };
472 
473 // UTF-8 encoded strings
474 class ARROW_EXPORT StringType : public BinaryType {
475  public:
476  static constexpr Type::type type_id = Type::STRING;
477 
479 
480  Status Accept(TypeVisitor* visitor) const override;
481  std::string ToString() const override;
482  std::string name() const override { return "utf8"; }
483 };
484 
485 class ARROW_EXPORT StructType : public NestedType {
486  public:
487  static constexpr Type::type type_id = Type::STRUCT;
488 
489  explicit StructType(const std::vector<std::shared_ptr<Field>>& fields)
490  : NestedType(Type::STRUCT) {
491  children_ = fields;
492  }
493 
494  Status Accept(TypeVisitor* visitor) const override;
495  std::string ToString() const override;
496  std::string name() const override { return "struct"; }
497 
498  std::vector<BufferDescr> GetBufferLayout() const override;
499 };
500 
501 class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
502  public:
503  static constexpr Type::type type_id = Type::DECIMAL;
504 
505  explicit DecimalType(int32_t precision, int32_t scale)
506  : FixedSizeBinaryType(16, Type::DECIMAL), precision_(precision), scale_(scale) {}
507 
508  Status Accept(TypeVisitor* visitor) const override;
509  std::string ToString() const override;
510  std::string name() const override { return "decimal"; }
511 
512  int32_t precision() const { return precision_; }
513  int32_t scale() const { return scale_; }
514 
515  private:
516  int32_t precision_;
517  int32_t scale_;
518 };
519 
520 enum class UnionMode : char { SPARSE, DENSE };
521 
522 class ARROW_EXPORT UnionType : public NestedType {
523  public:
524  static constexpr Type::type type_id = Type::UNION;
525 
526  UnionType(const std::vector<std::shared_ptr<Field>>& fields,
527  const std::vector<uint8_t>& type_codes, UnionMode mode = UnionMode::SPARSE);
528 
529  std::string ToString() const override;
530  std::string name() const override { return "union"; }
531  Status Accept(TypeVisitor* visitor) const override;
532 
533  std::vector<BufferDescr> GetBufferLayout() const override;
534 
535  const std::vector<uint8_t>& type_codes() const { return type_codes_; }
536 
537  UnionMode mode() const { return mode_; }
538 
539  private:
540  UnionMode mode_;
541 
542  // The type id used in the data to indicate each data type in the union. For
543  // example, the first type in the union might be denoted by the id 5 (instead
544  // of 0).
545  std::vector<uint8_t> type_codes_;
546 };
547 
548 // ----------------------------------------------------------------------
549 // Date and time types
550 
551 enum class DateUnit : char { DAY = 0, MILLI = 1 };
552 
553 class ARROW_EXPORT DateType : public FixedWidthType {
554  public:
555  DateUnit unit() const { return unit_; }
556 
557  protected:
558  DateType(Type::type type_id, DateUnit unit);
560 };
561 
563 class ARROW_EXPORT Date32Type : public DateType {
564  public:
565  static constexpr Type::type type_id = Type::DATE32;
566 
567  using c_type = int32_t;
568 
569  Date32Type();
570 
571  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
572 
573  Status Accept(TypeVisitor* visitor) const override;
574  std::string ToString() const override;
575 
576  std::string name() const override { return "date32"; }
577 };
578 
580 class ARROW_EXPORT Date64Type : public DateType {
581  public:
582  static constexpr Type::type type_id = Type::DATE64;
583 
584  using c_type = int64_t;
585 
586  Date64Type();
587 
588  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
589 
590  Status Accept(TypeVisitor* visitor) const override;
591  std::string ToString() const override;
592 
593  std::string name() const override { return "date64"; }
594 };
595 
596 struct TimeUnit {
597  enum type { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
598 };
599 
600 static inline std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
601  switch (unit) {
602  case TimeUnit::SECOND:
603  os << "s";
604  break;
605  case TimeUnit::MILLI:
606  os << "ms";
607  break;
608  case TimeUnit::MICRO:
609  os << "us";
610  break;
611  case TimeUnit::NANO:
612  os << "ns";
613  break;
614  }
615  return os;
616 }
617 
618 class ARROW_EXPORT TimeType : public FixedWidthType, public ParametricType {
619  public:
620  TimeUnit::type unit() const { return unit_; }
621 
622  protected:
623  TimeType(Type::type type_id, TimeUnit::type unit);
625 };
626 
627 class ARROW_EXPORT Time32Type : public TimeType {
628  public:
629  static constexpr Type::type type_id = Type::TIME32;
630  using c_type = int32_t;
631 
632  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
633 
634  explicit Time32Type(TimeUnit::type unit = TimeUnit::MILLI);
635 
636  Status Accept(TypeVisitor* visitor) const override;
637  std::string ToString() const override;
638 
639  std::string name() const override { return "time32"; }
640 };
641 
642 class ARROW_EXPORT Time64Type : public TimeType {
643  public:
644  static constexpr Type::type type_id = Type::TIME64;
645  using c_type = int64_t;
646 
647  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
648 
649  explicit Time64Type(TimeUnit::type unit = TimeUnit::MILLI);
650 
651  Status Accept(TypeVisitor* visitor) const override;
652  std::string ToString() const override;
653 
654  std::string name() const override { return "time64"; }
655 };
656 
657 class ARROW_EXPORT TimestampType : public FixedWidthType, public ParametricType {
658  public:
659  using Unit = TimeUnit;
660 
661  typedef int64_t c_type;
662  static constexpr Type::type type_id = Type::TIMESTAMP;
663 
664  int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
665 
667  : FixedWidthType(Type::TIMESTAMP), unit_(unit) {}
668 
669  explicit TimestampType(TimeUnit::type unit, const std::string& timezone)
670  : FixedWidthType(Type::TIMESTAMP), unit_(unit), timezone_(timezone) {}
671 
672  Status Accept(TypeVisitor* visitor) const override;
673  std::string ToString() const override;
674  std::string name() const override { return "timestamp"; }
675 
676  TimeUnit::type unit() const { return unit_; }
677  const std::string& timezone() const { return timezone_; }
678 
679  private:
680  TimeUnit::type unit_;
681  std::string timezone_;
682 };
683 
684 class ARROW_EXPORT IntervalType : public FixedWidthType {
685  public:
686  enum class Unit : char { YEAR_MONTH = 0, DAY_TIME = 1 };
687 
688  using c_type = int64_t;
689  static constexpr Type::type type_id = Type::INTERVAL;
690 
691  int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
692 
693  explicit IntervalType(Unit unit = Unit::YEAR_MONTH)
694  : FixedWidthType(Type::INTERVAL), unit_(unit) {}
695 
696  Status Accept(TypeVisitor* visitor) const override;
697  std::string ToString() const override { return name(); }
698  std::string name() const override { return "date"; }
699 
700  Unit unit() const { return unit_; }
701 
702  private:
703  Unit unit_;
704 };
705 
706 // ----------------------------------------------------------------------
707 // DictionaryType (for categorical or dictionary-encoded data)
708 
709 class ARROW_EXPORT DictionaryType : public FixedWidthType {
710  public:
711  static constexpr Type::type type_id = Type::DICTIONARY;
712 
713  DictionaryType(const std::shared_ptr<DataType>& index_type,
714  const std::shared_ptr<Array>& dictionary, bool ordered = false);
715 
716  int bit_width() const override;
717 
718  std::shared_ptr<DataType> index_type() const { return index_type_; }
719 
720  std::shared_ptr<Array> dictionary() const;
721 
722  Status Accept(TypeVisitor* visitor) const override;
723  std::string ToString() const override;
724  std::string name() const override { return "dictionary"; }
725 
726  bool ordered() const { return ordered_; }
727 
728  private:
729  // Must be an integer type (not currently checked)
730  std::shared_ptr<DataType> index_type_;
731  std::shared_ptr<Array> dictionary_;
732  bool ordered_;
733 };
734 
735 // ----------------------------------------------------------------------
736 // Schema
737 
741 class ARROW_EXPORT Schema {
742  public:
743  explicit Schema(const std::vector<std::shared_ptr<Field>>& fields,
744  const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
745 
746  explicit Schema(std::vector<std::shared_ptr<Field>>&& fields,
747  const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
748 
749  virtual ~Schema() = default;
750 
752  bool Equals(const Schema& other) const;
753 
755  std::shared_ptr<Field> field(int i) const { return fields_[i]; }
756 
758  std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
759 
761  int64_t GetFieldIndex(const std::string& name) const;
762 
763  const std::vector<std::shared_ptr<Field>>& fields() const { return fields_; }
764 
768  std::shared_ptr<const KeyValueMetadata> metadata() const;
769 
771  std::string ToString() const;
772 
773  Status AddField(int i, const std::shared_ptr<Field>& field,
774  std::shared_ptr<Schema>* out) const;
775  Status RemoveField(int i, std::shared_ptr<Schema>* out) const;
776 
777 #ifndef ARROW_NO_DEPRECATED_API
778  Status AddMetadata(const std::shared_ptr<const KeyValueMetadata>& metadata,
780  std::shared_ptr<Schema>* out) const;
781 #endif
782 
787  std::shared_ptr<Schema> AddMetadata(
788  const std::shared_ptr<const KeyValueMetadata>& metadata) const;
789 
791  std::shared_ptr<Schema> RemoveMetadata() const;
792 
794  int num_fields() const { return static_cast<int>(fields_.size()); }
795 
796  private:
797  std::vector<std::shared_ptr<Field>> fields_;
798  mutable std::unordered_map<std::string, int> name_to_index_;
799 
800  std::shared_ptr<const KeyValueMetadata> metadata_;
801 };
802 
803 // ----------------------------------------------------------------------
804 // Factory functions
805 
807 ARROW_EXPORT
808 std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width);
809 
811 ARROW_EXPORT
812 std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale);
813 
815 ARROW_EXPORT
816 std::shared_ptr<DataType> list(const std::shared_ptr<Field>& value_type);
817 
819 ARROW_EXPORT
820 std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type);
821 
823 ARROW_EXPORT
824 std::shared_ptr<DataType> timestamp(TimeUnit::type unit);
825 
827 ARROW_EXPORT
828 std::shared_ptr<DataType> timestamp(TimeUnit::type unit, const std::string& timezone);
829 
832 std::shared_ptr<DataType> ARROW_EXPORT time32(TimeUnit::type unit);
833 
836 std::shared_ptr<DataType> ARROW_EXPORT time64(TimeUnit::type unit);
837 
839 std::shared_ptr<DataType> ARROW_EXPORT
840 struct_(const std::vector<std::shared_ptr<Field>>& fields);
841 
843 std::shared_ptr<DataType> ARROW_EXPORT
844 union_(const std::vector<std::shared_ptr<Field>>& child_fields,
845  const std::vector<uint8_t>& type_codes, UnionMode mode = UnionMode::SPARSE);
846 
848 std::shared_ptr<DataType> ARROW_EXPORT
849 dictionary(const std::shared_ptr<DataType>& index_type,
850  const std::shared_ptr<Array>& values, bool ordered = false);
851 
858 std::shared_ptr<Field> ARROW_EXPORT field(
859  const std::string& name, const std::shared_ptr<DataType>& type, bool nullable = true,
860  const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
861 
867 ARROW_EXPORT
868 std::shared_ptr<Schema> schema(
869  const std::vector<std::shared_ptr<Field>>& fields,
870  const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
871 
877 ARROW_EXPORT
878 std::shared_ptr<Schema> schema(
879  std::vector<std::shared_ptr<Field>>&& fields,
880  const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
881 
882 } // namespace arrow
883 
884 #endif // ARROW_TYPE_H
Definition: type.h:485
int bit_width() const override
Definition: type.h:647
std::string ToString() const override
A string representation of the type, including any children.
Definition: type.h:697
std::shared_ptr< const KeyValueMetadata > metadata() const
Definition: type.h:251
Definition: type.h:596
A NULL type having no physical storage.
Definition: type.h:47
int bit_width() const override
Definition: type.h:335
const std::string & name() const
Definition: type.h:268
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:576
Definition: type.h:224
Definition: type.h:387
int32_t byte_width_
Definition: type.h:470
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:398
DateUnit unit() const
Definition: type.h:555
int32_t precision() const
Definition: type.h:512
Definition: type.h:339
A superclass for types having additional metadata.
Definition: type.h:233
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:372
type
Definition: type.h:45
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:674
Unsigned 16-bit little-endian integer.
Definition: type.h:59
Definition: type.h:213
IntervalType(Unit unit=Unit::YEAR_MONTH)
Definition: type.h:693
BinaryType()
Definition: type.h:437
Main data type enumeration.
Definition: type.h:44
Definition: type.h:401
const std::vector< std::shared_ptr< Field > > & fields() const
Definition: type.h:763
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:427
std::shared_ptr< Field > child(int i) const
Definition: type.h:162
Unsigned 32-bit little-endian integer.
Definition: type.h:65
int32_t c_type
Definition: type.h:567
std::shared_ptr< DataType > index_type() const
Definition: type.h:718
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:593
Unsigned 8-bit little-endian integer.
Definition: type.h:53
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:378
int64_t c_type
Definition: type.h:584
BufferType
Definition: type.h:136
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:366
Definition: type.h:597
Signed 8-bit little-endian integer.
Definition: type.h:56
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:321
NullType()
Definition: type.h:316
Date as int64_t milliseconds since UNIX epoch.
Definition: type.h:580
std::shared_ptr< DataType > list(const std::shared_ptr< Field > &value_type)
Make an instance of ListType.
A list of some logical data type.
Definition: type.h:120
#define NULLPTR
Definition: macros.h:69
std::shared_ptr< DataType > value_type() const
Definition: type.h:422
DateUnit
Definition: type.h:551
Dictionary aka Category type.
Definition: type.h:129
Time as signed 64-bit integer, representing either microseconds or nanoseconds since midnight...
Definition: type.h:110
int bit_width() const
Definition: type.h:143
virtual std::string ToString() const =0
A string representation of the type, including any children.
TimeUnit::type unit() const
Definition: type.h:676
Signed 32-bit little-endian integer.
Definition: type.h:68
Map, a repeated struct logical type.
Definition: type.h:132
Type::type id() const
Definition: type.h:181
DataType(Type::type id)
Definition: type.h:152
Time as signed 32-bit integer, representing either seconds or milliseconds since midnight.
Definition: type.h:106
std::shared_ptr< DataType > time32(TimeUnit::type unit)
Create an instance of 32-bit time type Unit can be either SECOND or MILLI.
Definition: status.h:106
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:336
type
Definition: type.h:597
Type::type id_
Definition: type.h:184
bool nullable() const
Definition: type.h:270
int32_t days since the UNIX epoch
Definition: type.h:95
std::shared_ptr< DataType > decimal(int32_t precision, int32_t scale)
Make an instance of DecimalType.
std::shared_ptr< DataType > dictionary(const std::shared_ptr< DataType > &index_type, const std::shared_ptr< Array > &values, bool ordered=false)
Create an instance of Dictionary type.
int64_t c_type
Definition: type.h:645
std::shared_ptr< DataType > fixed_size_binary(int32_t byte_width)
Make an instance of FixedSizeBinaryType.
int64_t c_type
Definition: type.h:688
Definition: type.h:597
ListType(const std::shared_ptr< Field > &value_field)
Definition: type.h:416
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:462
4-byte floating point value
Definition: type.h:80
Unit unit() const
Definition: type.h:700
Signed 16-bit little-endian integer.
Definition: type.h:62
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:724
const std::vector< uint8_t > & type_codes() const
Definition: type.h:535
DateUnit unit_
Definition: type.h:559
Definition: type.h:351
Definition: type.h:657
Definition: type.h:199
Boolean as 1 bit, LSB bit-packed ordering.
Definition: type.h:50
Signed 64-bit little-endian integer.
Definition: type.h:74
Definition: type.h:363
Struct of logical types.
Definition: type.h:123
Variable-length bytes (no guarantee of UTF8-ness)
Definition: type.h:89
Definition: type.h:312
int bit_width() const override
Definition: type.h:632
int bit_width() const override
Definition: type.h:588
Definition: type.h:433
std::shared_ptr< Field > field(const std::string &name, const std::shared_ptr< DataType > &type, bool nullable=true, const std::shared_ptr< const KeyValueMetadata > &metadata=NULLPTR)
Create a Field instance.
Definition: visitor.h:60
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:482
std::shared_ptr< DataType > type() const
Definition: type.h:269
Definition: type.h:326
TimestampType(TimeUnit::type unit, const std::string &timezone)
Definition: type.h:669
ListType(const std::shared_ptr< DataType > &value_type)
Definition: type.h:413
Definition: type.h:684
Definition: type.h:408
StringType()
Definition: type.h:478
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:639
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:354
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:342
DecimalType(int32_t precision, int32_t scale)
Definition: type.h:505
Definition: type.h:597
Exact timestamp encoded with int64 since UNIX epoch Default unit millisecond.
Definition: type.h:102
Definition: type.h:474
Unit
Definition: type.h:686
int bit_width() const override
Definition: type.h:691
Fixed-size binary. Each value occupies the same number of bytes.
Definition: type.h:92
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:441
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:510
std::vector< std::shared_ptr< Field > > children_
Definition: type.h:185
Definition: type.h:501
Definition: type.h:218
TimeUnit::type unit() const
Definition: type.h:620
BufferDescr(BufferType type, int bit_width)
Definition: type.h:140
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:405
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:698
TimestampType(TimeUnit::type unit=TimeUnit::MILLI)
Definition: type.h:666
std::shared_ptr< Field > value_field() const
Definition: type.h:420
const std::vector< std::shared_ptr< Field > > & children() const
Definition: type.h:164
Definition: type.h:235
Sequence of arrow::Field objects describing the columns of a record batch or table data structure...
Definition: type.h:741
Definition: type.h:597
std::ostream & operator<<(std::ostream &os, const DataType &type)
Definition: type.h:191
Definition: type.h:244
Top-level namespace for Apache Arrow C++ API.
Definition: allocator.h:29
Definition: type.h:618
FixedSizeBinaryType(int32_t byte_width, Type::type type_id)
Definition: type.h:457
Definition: type.h:394
FixedSizeBinaryType(int32_t byte_width)
Definition: type.h:455
Definition: type.h:357
int bit_width() const override
Definition: type.h:664
8-byte floating point value
Definition: type.h:83
int32_t scale() const
Definition: type.h:513
Definition: type.h:369
Date as int32_t days since UNIX epoch.
Definition: type.h:563
Definition: type.h:553
int32_t byte_width() const
Definition: type.h:466
int64_t milliseconds since the UNIX epoch
Definition: type.h:98
Definition: type.h:227
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:391
virtual Status Visit(const NullType &type)
int32_t c_type
Definition: type.h:630
std::shared_ptr< DataType > time64(TimeUnit::type unit)
Create an instance of 64-bit time type Unit can be either MICRO or NANO.
UTF8 variable-length string as List<Char>
Definition: type.h:86
Precision
Definition: type.h:227
Unsigned 64-bit little-endian integer.
Definition: type.h:71
std::shared_ptr< DataType > struct_(const std::vector< std::shared_ptr< Field >> &fields)
Create an instance of Struct type.
std::shared_ptr< Schema > schema(const std::vector< std::shared_ptr< Field >> &fields, const std::shared_ptr< const KeyValueMetadata > &metadata=NULLPTR)
Create a Schema instance.
Definition: type.h:240
BooleanType()
Definition: type.h:330
Field(const std::string &name, const std::shared_ptr< DataType > &type, bool nullable=true, const std::shared_ptr< const KeyValueMetadata > &metadata=NULLPTR)
Definition: type.h:246
Definition: type.h:709
int bit_width() const override
Definition: type.h:571
UnionMode mode() const
Definition: type.h:537
Definition: type.h:642
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:530
Definition: type.h:375
std::shared_ptr< Field > field(int i) const
Return the ith schema element. Does not boundscheck.
Definition: type.h:755
Definition: type.h:138
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:496
int num_children() const
Definition: type.h:166
Definition: type.h:345
int64_t c_type
Definition: type.h:661
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:654
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:360
2-byte floating point value
Definition: type.h:77
Definition: type.h:627
Unions of logical types.
Definition: type.h:126
Definition: type.h:150
std::shared_ptr< DataType > TypePtr
Definition: type.h:197
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:348
std::shared_ptr< DataType > union_(const std::vector< std::shared_ptr< Field >> &child_fields, const std::vector< uint8_t > &type_codes, UnionMode mode=UnionMode::SPARSE)
Create an instance of Union type.
bool ordered() const
Definition: type.h:726
Definition: type.h:522
#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName)
Definition: macros.h:23
YEAR_MONTH or DAY_TIME interval in SQL style.
Definition: type.h:113
UnionMode
Definition: type.h:520
const std::string & timezone() const
Definition: type.h:677
std::string name() const override
A string name of the type, omitting any child fields.
Definition: type.h:384
Precision- and scale-based decimal type.
Definition: type.h:117
BinaryType(Type::type logical_type)
Definition: type.h:447
BufferType type() const
Definition: type.h:142
Definition: type.h:208
Definition: type.h:381
std::shared_ptr< DataType > timestamp(TimeUnit::type unit)
Make an instance of TimestampType.
TimeUnit::type unit_
Definition: type.h:624
Definition: type.h:451
StructType(const std::vector< std::shared_ptr< Field >> &fields)
Definition: type.h:489
int num_fields() const
Return the number of fields (columns) in the schema.
Definition: type.h:794