arrow_schema/datatype.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::fmt;
19use std::str::FromStr;
20use std::sync::Arc;
21
22use crate::{ArrowError, Field, FieldRef, Fields, UnionFields};
23
24/// Datatypes supported by this implementation of Apache Arrow.
25///
26/// The variants of this enum include primitive fixed size types as well as
27/// parametric or nested types. See [`Schema.fbs`] for Arrow's specification.
28///
29/// # Examples
30///
31/// Primitive types
32/// ```
33/// # use arrow_schema::DataType;
34/// // create a new 32-bit signed integer
35/// let data_type = DataType::Int32;
36/// ```
37///
38/// Nested Types
39/// ```
40/// # use arrow_schema::{DataType, Field};
41/// # use std::sync::Arc;
42/// // create a new list of 32-bit signed integers directly
43/// let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
44/// // Create the same list type with constructor
45/// let list_data_type2 = DataType::new_list(DataType::Int32, true);
46/// assert_eq!(list_data_type, list_data_type2);
47/// ```
48///
49/// Dictionary Types
50/// ```
51/// # use arrow_schema::{DataType};
52/// // String Dictionary (key type Int32 and value type Utf8)
53/// let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
54/// ```
55///
56/// Timestamp Types
57/// ```
58/// # use arrow_schema::{DataType, TimeUnit};
59/// // timestamp with millisecond precision without timezone specified
60/// let data_type = DataType::Timestamp(TimeUnit::Millisecond, None);
61/// // timestamp with nanosecond precision in UTC timezone
62/// let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()));
63///```
64///
65/// # Display and FromStr
66///
67/// The `Display` and `FromStr` implementations for `DataType` are
68/// human-readable, parseable, and reversible.
69///
70/// ```
71/// # use arrow_schema::DataType;
72/// let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
73/// let data_type_string = data_type.to_string();
74/// assert_eq!(data_type_string, "Dictionary(Int32, Utf8)");
75/// // display can be parsed back into the original type
76/// let parsed_data_type: DataType = data_type.to_string().parse().unwrap();
77/// assert_eq!(data_type, parsed_data_type);
78/// ```
79///
80/// # Nested Support
81/// Currently, the Rust implementation supports the following nested types:
82/// - `List<T>`
83/// - `LargeList<T>`
84/// - `FixedSizeList<T>`
85/// - `Struct<T, U, V, ...>`
86/// - `Union<T, U, V, ...>`
87/// - `Map<K, V>`
88///
89/// Nested types can themselves be nested within other arrays.
90/// For more information on these types please see
91/// [the physical memory layout of Apache Arrow]
92///
93/// [`Schema.fbs`]: https://github.com/apache/arrow/blob/main/format/Schema.fbs
94/// [the physical memory layout of Apache Arrow]: https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout
95#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
96#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
97pub enum DataType {
98 /// Null type
99 Null,
100 /// A boolean datatype representing the values `true` and `false`.
101 Boolean,
102 /// A signed 8-bit integer.
103 Int8,
104 /// A signed 16-bit integer.
105 Int16,
106 /// A signed 32-bit integer.
107 Int32,
108 /// A signed 64-bit integer.
109 Int64,
110 /// An unsigned 8-bit integer.
111 UInt8,
112 /// An unsigned 16-bit integer.
113 UInt16,
114 /// An unsigned 32-bit integer.
115 UInt32,
116 /// An unsigned 64-bit integer.
117 UInt64,
118 /// A 16-bit floating point number.
119 Float16,
120 /// A 32-bit floating point number.
121 Float32,
122 /// A 64-bit floating point number.
123 Float64,
124 /// A timestamp with an optional timezone.
125 ///
126 /// Time is measured as a Unix epoch, counting the seconds from
127 /// 00:00:00.000 on 1 January 1970, excluding leap seconds,
128 /// as a signed 64-bit integer.
129 ///
130 /// The time zone is a string indicating the name of a time zone, one of:
131 ///
132 /// * As used in the Olson time zone database (the "tz database" or
133 /// "tzdata"), such as "America/New_York"
134 /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
135 ///
136 /// Timestamps with a non-empty timezone
137 /// ------------------------------------
138 ///
139 /// If a Timestamp column has a non-empty timezone value, its epoch is
140 /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone
141 /// (the Unix epoch), regardless of the Timestamp's own timezone.
142 ///
143 /// Therefore, timestamp values with a non-empty timezone correspond to
144 /// physical points in time together with some additional information about
145 /// how the data was obtained and/or how to display it (the timezone).
146 ///
147 /// For example, the timestamp value 0 with the timezone string "Europe/Paris"
148 /// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
149 /// application may prefer to display it as "January 1st 1970, 01h00" in
150 /// the Europe/Paris timezone (which is the same physical point in time).
151 ///
152 /// One consequence is that timestamp values with a non-empty timezone
153 /// can be compared and ordered directly, since they all share the same
154 /// well-known point of reference (the Unix epoch).
155 ///
156 /// Timestamps with an unset / empty timezone
157 /// -----------------------------------------
158 ///
159 /// If a Timestamp column has no timezone value, its epoch is
160 /// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone.
161 ///
162 /// Therefore, timestamp values without a timezone cannot be meaningfully
163 /// interpreted as physical points in time, but only as calendar / clock
164 /// indications ("wall clock time") in an unspecified timezone.
165 ///
166 /// For example, the timestamp value 0 with an empty timezone string
167 /// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
168 /// is not enough information to interpret it as a well-defined physical
169 /// point in time.
170 ///
171 /// One consequence is that timestamp values without a timezone cannot
172 /// be reliably compared or ordered, since they may have different points of
173 /// reference. In particular, it is *not* possible to interpret an unset
174 /// or empty timezone as the same as "UTC".
175 ///
176 /// Conversion between timezones
177 /// ----------------------------
178 ///
179 /// If a Timestamp column has a non-empty timezone, changing the timezone
180 /// to a different non-empty value is a metadata-only operation:
181 /// the timestamp values need not change as their point of reference remains
182 /// the same (the Unix epoch).
183 ///
184 /// However, if a Timestamp column has no timezone value, changing it to a
185 /// non-empty value requires to think about the desired semantics.
186 /// One possibility is to assume that the original timestamp values are
187 /// relative to the epoch of the timezone being set; timestamp values should
188 /// then adjusted to the Unix epoch (for example, changing the timezone from
189 /// empty to "Europe/Paris" would require converting the timestamp values
190 /// from "Europe/Paris" to "UTC", which seems counter-intuitive but is
191 /// nevertheless correct).
192 ///
193 /// ```
194 /// # use arrow_schema::{DataType, TimeUnit};
195 /// DataType::Timestamp(TimeUnit::Second, None);
196 /// DataType::Timestamp(TimeUnit::Second, Some("literal".into()));
197 /// DataType::Timestamp(TimeUnit::Second, Some("string".to_string().into()));
198 /// ```
199 ///
200 /// # Timezone representation
201 /// ----------------------------
202 /// It is possible to use either the timezone string representation, such as "UTC", or the absolute time zone offset "+00:00".
203 /// For timezones with fixed offsets, such as "UTC" or "JST", the offset representation is recommended, as it is more explicit and less ambiguous.
204 ///
205 /// Most arrow-rs functionalities use the absolute offset representation,
206 /// such as [`PrimitiveArray::with_timezone_utc`] that applies a
207 /// UTC timezone to timestamp arrays.
208 ///
209 /// [`PrimitiveArray::with_timezone_utc`]: https://docs.rs/arrow/latest/arrow/array/struct.PrimitiveArray.html#method.with_timezone_utc
210 ///
211 /// Timezone string parsing
212 /// -----------------------
213 /// When feature `chrono-tz` is not enabled, allowed timezone strings are fixed offsets of the form "+09:00", "-09" or "+0930".
214 ///
215 /// When feature `chrono-tz` is enabled, additional strings supported by [chrono_tz](https://docs.rs/chrono-tz/latest/chrono_tz/)
216 /// are also allowed, which include [IANA database](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
217 /// timezones.
218 Timestamp(TimeUnit, Option<Arc<str>>),
219 /// A signed 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
220 /// in days.
221 Date32,
222 /// A signed 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
223 /// in milliseconds.
224 ///
225 /// # Valid Ranges
226 ///
227 /// According to the Arrow specification ([Schema.fbs]), values of Date64
228 /// are treated as the number of *days*, in milliseconds, since the UNIX
229 /// epoch. Therefore, values of this type must be evenly divisible by
230 /// `86_400_000`, the number of milliseconds in a standard day.
231 ///
232 /// It is not valid to store milliseconds that do not represent an exact
233 /// day. The reason for this restriction is compatibility with other
234 /// language's native libraries (specifically Java), which historically
235 /// lacked a dedicated date type and only supported timestamps.
236 ///
237 /// # Validation
238 ///
239 /// This library does not validate or enforce that Date64 values are evenly
240 /// divisible by `86_400_000` for performance and usability reasons. Date64
241 /// values are treated similarly to `Timestamp(TimeUnit::Millisecond,
242 /// None)`: values will be displayed with a time of day if the value does
243 /// not represent an exact day, and arithmetic will be done at the
244 /// millisecond granularity.
245 ///
246 /// # Recommendation
247 ///
248 /// Users should prefer [`Date32`] to cleanly represent the number
249 /// of days, or one of the Timestamp variants to include time as part of the
250 /// representation, depending on their use case.
251 ///
252 /// # Further Reading
253 ///
254 /// For more details, see [#5288](https://github.com/apache/arrow-rs/issues/5288).
255 ///
256 /// [`Date32`]: Self::Date32
257 /// [Schema.fbs]: https://github.com/apache/arrow/blob/main/format/Schema.fbs
258 Date64,
259 /// A signed 32-bit time representing the elapsed time since midnight in the unit of `TimeUnit`.
260 /// Must be either seconds or milliseconds.
261 Time32(TimeUnit),
262 /// A signed 64-bit time representing the elapsed time since midnight in the unit of `TimeUnit`.
263 /// Must be either microseconds or nanoseconds.
264 Time64(TimeUnit),
265 /// Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds.
266 Duration(TimeUnit),
267 /// A "calendar" interval which models types that don't necessarily
268 /// have a precise duration without the context of a base timestamp (e.g.
269 /// days can differ in length during day light savings time transitions).
270 Interval(IntervalUnit),
271 /// Opaque binary data of variable length.
272 ///
273 /// A single Binary array can store up to [`i32::MAX`] bytes
274 /// of binary data in total.
275 Binary,
276 /// Opaque binary data of fixed size.
277 /// Enum parameter specifies the number of bytes per value.
278 FixedSizeBinary(i32),
279 /// Opaque binary data of variable length and 64-bit offsets.
280 ///
281 /// A single LargeBinary array can store up to [`i64::MAX`] bytes
282 /// of binary data in total.
283 LargeBinary,
284 /// Opaque binary data of variable length.
285 ///
286 /// Logically the same as [`Binary`], but the internal representation uses a view
287 /// struct that contains the string length and either the string's entire data
288 /// inline (for small strings) or an inlined prefix, an index of another buffer,
289 /// and an offset pointing to a slice in that buffer (for non-small strings).
290 ///
291 /// [`Binary`]: Self::Binary
292 BinaryView,
293 /// A variable-length string in Unicode with UTF-8 encoding.
294 ///
295 /// A single Utf8 array can store up to [`i32::MAX`] bytes
296 /// of string data in total.
297 Utf8,
298 /// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets.
299 ///
300 /// A single LargeUtf8 array can store up to [`i64::MAX`] bytes
301 /// of string data in total.
302 LargeUtf8,
303 /// A variable-length string in Unicode with UTF-8 encoding
304 ///
305 /// Logically the same as [`Utf8`], but the internal representation uses a view
306 /// struct that contains the string length and either the string's entire data
307 /// inline (for small strings) or an inlined prefix, an index of another buffer,
308 /// and an offset pointing to a slice in that buffer (for non-small strings).
309 ///
310 /// [`Utf8`]: Self::Utf8
311 Utf8View,
312 /// A list of some logical data type with variable length.
313 ///
314 /// A single List array can store up to [`i32::MAX`] elements in total.
315 List(FieldRef),
316
317 /// (NOT YET FULLY SUPPORTED) A list of some logical data type with variable length.
318 ///
319 /// Logically the same as [`List`], but the internal representation differs in how child
320 /// data is referenced, allowing flexibility in how data is layed out.
321 ///
322 /// Note this data type is not yet fully supported. Using it with arrow APIs may result in `panic`s.
323 ///
324 /// [`List`]: Self::List
325 ListView(FieldRef),
326 /// A list of some logical data type with fixed length.
327 FixedSizeList(FieldRef, i32),
328 /// A list of some logical data type with variable length and 64-bit offsets.
329 ///
330 /// A single LargeList array can store up to [`i64::MAX`] elements in total.
331 LargeList(FieldRef),
332
333 /// (NOT YET FULLY SUPPORTED) A list of some logical data type with variable length and 64-bit offsets.
334 ///
335 /// Logically the same as [`LargeList`], but the internal representation differs in how child
336 /// data is referenced, allowing flexibility in how data is layed out.
337 ///
338 /// Note this data type is not yet fully supported. Using it with arrow APIs may result in `panic`s.
339 ///
340 /// [`LargeList`]: Self::LargeList
341 LargeListView(FieldRef),
342 /// A nested datatype that contains a number of sub-fields.
343 Struct(Fields),
344 /// A nested datatype that can represent slots of differing types. Components:
345 ///
346 /// 1. [`UnionFields`]
347 /// 2. The type of union (Sparse or Dense)
348 Union(UnionFields, UnionMode),
349 /// A dictionary encoded array (`key_type`, `value_type`), where
350 /// each array element is an index of `key_type` into an
351 /// associated dictionary of `value_type`.
352 ///
353 /// Dictionary arrays are used to store columns of `value_type`
354 /// that contain many repeated values using less memory, but with
355 /// a higher CPU overhead for some operations.
356 ///
357 /// This type mostly used to represent low cardinality string
358 /// arrays or a limited set of primitive types as integers.
359 Dictionary(Box<DataType>, Box<DataType>),
360 /// Exact 32-bit width decimal value with precision and scale
361 ///
362 /// * precision is the total number of digits
363 /// * scale is the number of digits past the decimal
364 ///
365 /// For example the number 123.45 has precision 5 and scale 2.
366 ///
367 /// In certain situations, scale could be negative number. For
368 /// negative scale, it is the number of padding 0 to the right
369 /// of the digits.
370 ///
371 /// For example the number 12300 could be treated as a decimal
372 /// has precision 3 and scale -2.
373 Decimal32(u8, i8),
374 /// Exact 64-bit width decimal value with precision and scale
375 ///
376 /// * precision is the total number of digits
377 /// * scale is the number of digits past the decimal
378 ///
379 /// For example the number 123.45 has precision 5 and scale 2.
380 ///
381 /// In certain situations, scale could be negative number. For
382 /// negative scale, it is the number of padding 0 to the right
383 /// of the digits.
384 ///
385 /// For example the number 12300 could be treated as a decimal
386 /// has precision 3 and scale -2.
387 Decimal64(u8, i8),
388 /// Exact 128-bit width decimal value with precision and scale
389 ///
390 /// * precision is the total number of digits
391 /// * scale is the number of digits past the decimal
392 ///
393 /// For example the number 123.45 has precision 5 and scale 2.
394 ///
395 /// In certain situations, scale could be negative number. For
396 /// negative scale, it is the number of padding 0 to the right
397 /// of the digits.
398 ///
399 /// For example the number 12300 could be treated as a decimal
400 /// has precision 3 and scale -2.
401 Decimal128(u8, i8),
402 /// Exact 256-bit width decimal value with precision and scale
403 ///
404 /// * precision is the total number of digits
405 /// * scale is the number of digits past the decimal
406 ///
407 /// For example the number 123.45 has precision 5 and scale 2.
408 ///
409 /// In certain situations, scale could be negative number. For
410 /// negative scale, it is the number of padding 0 to the right
411 /// of the digits.
412 ///
413 /// For example the number 12300 could be treated as a decimal
414 /// has precision 3 and scale -2.
415 Decimal256(u8, i8),
416 /// A Map is a logical nested type that is represented as
417 ///
418 /// `List<entries: Struct<key: K, value: V>>`
419 ///
420 /// The keys and values are each respectively contiguous.
421 /// The key and value types are not constrained, but keys should be
422 /// hashable and unique.
423 /// Whether the keys are sorted can be set in the `bool` after the `Field`.
424 ///
425 /// In a field with Map type, the field has a child Struct field, which then
426 /// has two children: key type and the second the value type. The names of the
427 /// child fields may be respectively "entries", "key", and "value", but this is
428 /// not enforced.
429 Map(FieldRef, bool),
430 /// A run-end encoding (REE) is a variation of run-length encoding (RLE). These
431 /// encodings are well-suited for representing data containing sequences of the
432 /// same value, called runs. Each run is represented as a value and an integer giving
433 /// the index in the array where the run ends.
434 ///
435 /// A run-end encoded array has no buffers by itself, but has two child arrays. The
436 /// first child array, called the run ends array, holds either 16, 32, or 64-bit
437 /// signed integers. The actual values of each run are held in the second child array.
438 ///
439 /// These child arrays are prescribed the standard names of "run_ends" and "values"
440 /// respectively.
441 RunEndEncoded(FieldRef, FieldRef),
442}
443
444/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds.
445#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
446#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
447pub enum TimeUnit {
448 /// Time in seconds.
449 Second,
450 /// Time in milliseconds.
451 Millisecond,
452 /// Time in microseconds.
453 Microsecond,
454 /// Time in nanoseconds.
455 Nanosecond,
456}
457
458/// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style.
459#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
460#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
461pub enum IntervalUnit {
462 /// Indicates the number of elapsed whole months, stored as 4-byte integers.
463 YearMonth,
464 /// Indicates the number of elapsed days and milliseconds,
465 /// stored as 2 contiguous 32-bit integers (days, milliseconds) (8-bytes in total).
466 DayTime,
467 /// A triple of the number of elapsed months, days, and nanoseconds.
468 /// The values are stored contiguously in 16 byte blocks. Months and
469 /// days are encoded as 32 bit integers and nanoseconds is encoded as a
470 /// 64 bit integer. All integers are signed. Each field is independent
471 /// (e.g. there is no constraint that nanoseconds have the same sign
472 /// as days or that the quantity of nanoseconds represents less
473 /// than a day's worth of time).
474 MonthDayNano,
475}
476
477/// Sparse or Dense union layouts
478#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)]
479#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
480pub enum UnionMode {
481 /// Sparse union layout
482 Sparse,
483 /// Dense union layout
484 Dense,
485}
486
487impl fmt::Display for DataType {
488 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
489 match &self {
490 DataType::Struct(fields) => {
491 write!(f, "Struct(")?;
492 if !fields.is_empty() {
493 let fields_str = fields
494 .iter()
495 .map(|f| format!("{} {}", f.name(), f.data_type()))
496 .collect::<Vec<_>>()
497 .join(", ");
498 write!(f, "{fields_str}")?;
499 }
500 write!(f, ")")?;
501 Ok(())
502 }
503 _ => write!(f, "{self:?}"),
504 }
505 }
506}
507
508/// Parses `str` into a `DataType`.
509///
510/// This is the reverse of [`DataType`]'s `Display`
511/// impl, and maintains the invariant that
512/// `DataType::try_from(&data_type.to_string()).unwrap() == data_type`
513///
514/// # Example
515/// ```
516/// use arrow_schema::DataType;
517///
518/// let data_type: DataType = "Int32".parse().unwrap();
519/// assert_eq!(data_type, DataType::Int32);
520/// ```
521impl FromStr for DataType {
522 type Err = ArrowError;
523
524 fn from_str(s: &str) -> Result<Self, Self::Err> {
525 crate::datatype_parse::parse_data_type(s)
526 }
527}
528
529impl TryFrom<&str> for DataType {
530 type Error = ArrowError;
531
532 fn try_from(value: &str) -> Result<Self, Self::Error> {
533 value.parse()
534 }
535}
536
537impl DataType {
538 /// Returns true if the type is primitive: (numeric, temporal).
539 #[inline]
540 pub fn is_primitive(&self) -> bool {
541 self.is_numeric() || self.is_temporal()
542 }
543
544 /// Returns true if this type is numeric: (UInt*, Int*, Float*, Decimal*).
545 #[inline]
546 pub fn is_numeric(&self) -> bool {
547 use DataType::*;
548 matches!(
549 self,
550 UInt8
551 | UInt16
552 | UInt32
553 | UInt64
554 | Int8
555 | Int16
556 | Int32
557 | Int64
558 | Float16
559 | Float32
560 | Float64
561 | Decimal32(_, _)
562 | Decimal64(_, _)
563 | Decimal128(_, _)
564 | Decimal256(_, _)
565 )
566 }
567
568 /// Returns true if this type is temporal: (Date*, Time*, Duration, or Interval).
569 #[inline]
570 pub fn is_temporal(&self) -> bool {
571 use DataType::*;
572 matches!(
573 self,
574 Date32 | Date64 | Timestamp(_, _) | Time32(_) | Time64(_) | Duration(_) | Interval(_)
575 )
576 }
577
578 /// Returns true if this type is floating: (Float*).
579 #[inline]
580 pub fn is_floating(&self) -> bool {
581 use DataType::*;
582 matches!(self, Float16 | Float32 | Float64)
583 }
584
585 /// Returns true if this type is integer: (Int*, UInt*).
586 #[inline]
587 pub fn is_integer(&self) -> bool {
588 self.is_signed_integer() || self.is_unsigned_integer()
589 }
590
591 /// Returns true if this type is signed integer: (Int*).
592 #[inline]
593 pub fn is_signed_integer(&self) -> bool {
594 use DataType::*;
595 matches!(self, Int8 | Int16 | Int32 | Int64)
596 }
597
598 /// Returns true if this type is unsigned integer: (UInt*).
599 #[inline]
600 pub fn is_unsigned_integer(&self) -> bool {
601 use DataType::*;
602 matches!(self, UInt8 | UInt16 | UInt32 | UInt64)
603 }
604
605 /// Returns true if this type is valid as a dictionary key
606 #[inline]
607 pub fn is_dictionary_key_type(&self) -> bool {
608 self.is_integer()
609 }
610
611 /// Returns true if this type is valid for run-ends array in RunArray
612 #[inline]
613 pub fn is_run_ends_type(&self) -> bool {
614 use DataType::*;
615 matches!(self, Int16 | Int32 | Int64)
616 }
617
618 /// Returns true if this type is nested (List, FixedSizeList, LargeList, ListView. LargeListView, Struct, Union,
619 /// or Map), or a dictionary of a nested type
620 #[inline]
621 pub fn is_nested(&self) -> bool {
622 use DataType::*;
623 match self {
624 Dictionary(_, v) => DataType::is_nested(v.as_ref()),
625 RunEndEncoded(_, v) => DataType::is_nested(v.data_type()),
626 List(_)
627 | FixedSizeList(_, _)
628 | LargeList(_)
629 | ListView(_)
630 | LargeListView(_)
631 | Struct(_)
632 | Union(_, _)
633 | Map(_, _) => true,
634 _ => false,
635 }
636 }
637
638 /// Returns true if this type is DataType::Null.
639 #[inline]
640 pub fn is_null(&self) -> bool {
641 use DataType::*;
642 matches!(self, Null)
643 }
644
645 /// Compares the datatype with another, ignoring nested field names
646 /// and metadata.
647 pub fn equals_datatype(&self, other: &DataType) -> bool {
648 match (&self, other) {
649 (DataType::List(a), DataType::List(b))
650 | (DataType::LargeList(a), DataType::LargeList(b))
651 | (DataType::ListView(a), DataType::ListView(b))
652 | (DataType::LargeListView(a), DataType::LargeListView(b)) => {
653 a.is_nullable() == b.is_nullable() && a.data_type().equals_datatype(b.data_type())
654 }
655 (DataType::FixedSizeList(a, a_size), DataType::FixedSizeList(b, b_size)) => {
656 a_size == b_size
657 && a.is_nullable() == b.is_nullable()
658 && a.data_type().equals_datatype(b.data_type())
659 }
660 (DataType::Struct(a), DataType::Struct(b)) => {
661 a.len() == b.len()
662 && a.iter().zip(b).all(|(a, b)| {
663 a.is_nullable() == b.is_nullable()
664 && a.data_type().equals_datatype(b.data_type())
665 })
666 }
667 (DataType::Map(a_field, a_is_sorted), DataType::Map(b_field, b_is_sorted)) => {
668 a_field.is_nullable() == b_field.is_nullable()
669 && a_field.data_type().equals_datatype(b_field.data_type())
670 && a_is_sorted == b_is_sorted
671 }
672 (DataType::Dictionary(a_key, a_value), DataType::Dictionary(b_key, b_value)) => {
673 a_key.equals_datatype(b_key) && a_value.equals_datatype(b_value)
674 }
675 (
676 DataType::RunEndEncoded(a_run_ends, a_values),
677 DataType::RunEndEncoded(b_run_ends, b_values),
678 ) => {
679 a_run_ends.is_nullable() == b_run_ends.is_nullable()
680 && a_run_ends
681 .data_type()
682 .equals_datatype(b_run_ends.data_type())
683 && a_values.is_nullable() == b_values.is_nullable()
684 && a_values.data_type().equals_datatype(b_values.data_type())
685 }
686 (
687 DataType::Union(a_union_fields, a_union_mode),
688 DataType::Union(b_union_fields, b_union_mode),
689 ) => {
690 a_union_mode == b_union_mode
691 && a_union_fields.len() == b_union_fields.len()
692 && a_union_fields.iter().all(|a| {
693 b_union_fields.iter().any(|b| {
694 a.0 == b.0
695 && a.1.is_nullable() == b.1.is_nullable()
696 && a.1.data_type().equals_datatype(b.1.data_type())
697 })
698 })
699 }
700 _ => self == other,
701 }
702 }
703
704 /// Returns the byte width of this type if it is a primitive type
705 ///
706 /// Returns `None` if not a primitive type
707 #[inline]
708 pub fn primitive_width(&self) -> Option<usize> {
709 match self {
710 DataType::Null => None,
711 DataType::Boolean => None,
712 DataType::Int8 | DataType::UInt8 => Some(1),
713 DataType::Int16 | DataType::UInt16 | DataType::Float16 => Some(2),
714 DataType::Int32 | DataType::UInt32 | DataType::Float32 => Some(4),
715 DataType::Int64 | DataType::UInt64 | DataType::Float64 => Some(8),
716 DataType::Timestamp(_, _) => Some(8),
717 DataType::Date32 | DataType::Time32(_) => Some(4),
718 DataType::Date64 | DataType::Time64(_) => Some(8),
719 DataType::Duration(_) => Some(8),
720 DataType::Interval(IntervalUnit::YearMonth) => Some(4),
721 DataType::Interval(IntervalUnit::DayTime) => Some(8),
722 DataType::Interval(IntervalUnit::MonthDayNano) => Some(16),
723 DataType::Decimal32(_, _) => Some(4),
724 DataType::Decimal64(_, _) => Some(8),
725 DataType::Decimal128(_, _) => Some(16),
726 DataType::Decimal256(_, _) => Some(32),
727 DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => None,
728 DataType::Binary | DataType::LargeBinary | DataType::BinaryView => None,
729 DataType::FixedSizeBinary(_) => None,
730 DataType::List(_)
731 | DataType::ListView(_)
732 | DataType::LargeList(_)
733 | DataType::LargeListView(_)
734 | DataType::Map(_, _) => None,
735 DataType::FixedSizeList(_, _) => None,
736 DataType::Struct(_) => None,
737 DataType::Union(_, _) => None,
738 DataType::Dictionary(_, _) => None,
739 DataType::RunEndEncoded(_, _) => None,
740 }
741 }
742
743 /// Return size of this instance in bytes.
744 ///
745 /// Includes the size of `Self`.
746 pub fn size(&self) -> usize {
747 std::mem::size_of_val(self)
748 + match self {
749 DataType::Null
750 | DataType::Boolean
751 | DataType::Int8
752 | DataType::Int16
753 | DataType::Int32
754 | DataType::Int64
755 | DataType::UInt8
756 | DataType::UInt16
757 | DataType::UInt32
758 | DataType::UInt64
759 | DataType::Float16
760 | DataType::Float32
761 | DataType::Float64
762 | DataType::Date32
763 | DataType::Date64
764 | DataType::Time32(_)
765 | DataType::Time64(_)
766 | DataType::Duration(_)
767 | DataType::Interval(_)
768 | DataType::Binary
769 | DataType::FixedSizeBinary(_)
770 | DataType::LargeBinary
771 | DataType::BinaryView
772 | DataType::Utf8
773 | DataType::LargeUtf8
774 | DataType::Utf8View
775 | DataType::Decimal32(_, _)
776 | DataType::Decimal64(_, _)
777 | DataType::Decimal128(_, _)
778 | DataType::Decimal256(_, _) => 0,
779 DataType::Timestamp(_, s) => s.as_ref().map(|s| s.len()).unwrap_or_default(),
780 DataType::List(field)
781 | DataType::ListView(field)
782 | DataType::FixedSizeList(field, _)
783 | DataType::LargeList(field)
784 | DataType::LargeListView(field)
785 | DataType::Map(field, _) => field.size(),
786 DataType::Struct(fields) => fields.size(),
787 DataType::Union(fields, _) => fields.size(),
788 DataType::Dictionary(dt1, dt2) => dt1.size() + dt2.size(),
789 DataType::RunEndEncoded(run_ends, values) => {
790 run_ends.size() - std::mem::size_of_val(run_ends) + values.size()
791 - std::mem::size_of_val(values)
792 }
793 }
794 }
795
796 /// Check to see if `self` is a superset of `other`
797 ///
798 /// If DataType is a nested type, then it will check to see if the nested type is a superset of the other nested type
799 /// else it will check to see if the DataType is equal to the other DataType
800 pub fn contains(&self, other: &DataType) -> bool {
801 match (self, other) {
802 (DataType::List(f1), DataType::List(f2))
803 | (DataType::LargeList(f1), DataType::LargeList(f2))
804 | (DataType::ListView(f1), DataType::ListView(f2))
805 | (DataType::LargeListView(f1), DataType::LargeListView(f2)) => f1.contains(f2),
806 (DataType::FixedSizeList(f1, s1), DataType::FixedSizeList(f2, s2)) => {
807 s1 == s2 && f1.contains(f2)
808 }
809 (DataType::Map(f1, s1), DataType::Map(f2, s2)) => s1 == s2 && f1.contains(f2),
810 (DataType::Struct(f1), DataType::Struct(f2)) => f1.contains(f2),
811 (DataType::Union(f1, s1), DataType::Union(f2, s2)) => {
812 s1 == s2
813 && f1
814 .iter()
815 .all(|f1| f2.iter().any(|f2| f1.0 == f2.0 && f1.1.contains(f2.1)))
816 }
817 (DataType::Dictionary(k1, v1), DataType::Dictionary(k2, v2)) => {
818 k1.contains(k2) && v1.contains(v2)
819 }
820 _ => self == other,
821 }
822 }
823
824 /// Create a [`DataType::List`] with elements of the specified type
825 /// and nullability, and conventionally named inner [`Field`] (`"item"`).
826 ///
827 /// To specify field level metadata, construct the inner [`Field`]
828 /// directly via [`Field::new`] or [`Field::new_list_field`].
829 pub fn new_list(data_type: DataType, nullable: bool) -> Self {
830 DataType::List(Arc::new(Field::new_list_field(data_type, nullable)))
831 }
832
833 /// Create a [`DataType::LargeList`] with elements of the specified type
834 /// and nullability, and conventionally named inner [`Field`] (`"item"`).
835 ///
836 /// To specify field level metadata, construct the inner [`Field`]
837 /// directly via [`Field::new`] or [`Field::new_list_field`].
838 pub fn new_large_list(data_type: DataType, nullable: bool) -> Self {
839 DataType::LargeList(Arc::new(Field::new_list_field(data_type, nullable)))
840 }
841
842 /// Create a [`DataType::FixedSizeList`] with elements of the specified type, size
843 /// and nullability, and conventionally named inner [`Field`] (`"item"`).
844 ///
845 /// To specify field level metadata, construct the inner [`Field`]
846 /// directly via [`Field::new`] or [`Field::new_list_field`].
847 pub fn new_fixed_size_list(data_type: DataType, size: i32, nullable: bool) -> Self {
848 DataType::FixedSizeList(Arc::new(Field::new_list_field(data_type, nullable)), size)
849 }
850}
851
852/// The maximum precision for [DataType::Decimal32] values
853pub const DECIMAL32_MAX_PRECISION: u8 = 9;
854
855/// The maximum scale for [DataType::Decimal32] values
856pub const DECIMAL32_MAX_SCALE: i8 = 9;
857
858/// The maximum precision for [DataType::Decimal64] values
859pub const DECIMAL64_MAX_PRECISION: u8 = 18;
860
861/// The maximum scale for [DataType::Decimal64] values
862pub const DECIMAL64_MAX_SCALE: i8 = 18;
863
864/// The maximum precision for [DataType::Decimal128] values
865pub const DECIMAL128_MAX_PRECISION: u8 = 38;
866
867/// The maximum scale for [DataType::Decimal128] values
868pub const DECIMAL128_MAX_SCALE: i8 = 38;
869
870/// The maximum precision for [DataType::Decimal256] values
871pub const DECIMAL256_MAX_PRECISION: u8 = 76;
872
873/// The maximum scale for [DataType::Decimal256] values
874pub const DECIMAL256_MAX_SCALE: i8 = 76;
875
876/// The default scale for [DataType::Decimal32] values
877pub const DECIMAL32_DEFAULT_SCALE: i8 = 2;
878
879/// The default scale for [DataType::Decimal64] values
880pub const DECIMAL64_DEFAULT_SCALE: i8 = 6;
881
882/// The default scale for [DataType::Decimal128] and [DataType::Decimal256]
883/// values
884pub const DECIMAL_DEFAULT_SCALE: i8 = 10;
885
886#[cfg(test)]
887mod tests {
888 use super::*;
889
890 #[test]
891 #[cfg(feature = "serde")]
892 fn serde_struct_type() {
893 use std::collections::HashMap;
894
895 let kv_array = [("k".to_string(), "v".to_string())];
896 let field_metadata: HashMap<String, String> = kv_array.iter().cloned().collect();
897
898 // Non-empty map: should be converted as JSON obj { ... }
899 let first_name =
900 Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
901
902 // Empty map: should be omitted.
903 let last_name =
904 Field::new("last_name", DataType::Utf8, false).with_metadata(HashMap::default());
905
906 let person = DataType::Struct(Fields::from(vec![
907 first_name,
908 last_name,
909 Field::new(
910 "address",
911 DataType::Struct(Fields::from(vec![
912 Field::new("street", DataType::Utf8, false),
913 Field::new("zip", DataType::UInt16, false),
914 ])),
915 false,
916 ),
917 ]));
918
919 let serialized = serde_json::to_string(&person).unwrap();
920
921 // NOTE that this is testing the default (derived) serialization format, not the
922 // JSON format specified in metadata.md
923
924 assert_eq!(
925 "{\"Struct\":[\
926 {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{\"k\":\"v\"}},\
927 {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}},\
928 {\"name\":\"address\",\"data_type\":{\"Struct\":\
929 [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}},\
930 {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}}\
931 ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}}]}",
932 serialized
933 );
934
935 let deserialized = serde_json::from_str(&serialized).unwrap();
936
937 assert_eq!(person, deserialized);
938 }
939
940 #[test]
941 fn test_list_datatype_equality() {
942 // tests that list type equality is checked while ignoring list names
943 let list_a = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
944 let list_b = DataType::List(Arc::new(Field::new("array", DataType::Int32, true)));
945 let list_c = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
946 let list_d = DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, true)));
947 assert!(list_a.equals_datatype(&list_b));
948 assert!(!list_a.equals_datatype(&list_c));
949 assert!(!list_b.equals_datatype(&list_c));
950 assert!(!list_a.equals_datatype(&list_d));
951
952 let list_e =
953 DataType::FixedSizeList(Arc::new(Field::new_list_field(list_a.clone(), false)), 3);
954 let list_f =
955 DataType::FixedSizeList(Arc::new(Field::new("array", list_b.clone(), false)), 3);
956 let list_g = DataType::FixedSizeList(
957 Arc::new(Field::new_list_field(DataType::FixedSizeBinary(3), true)),
958 3,
959 );
960 assert!(list_e.equals_datatype(&list_f));
961 assert!(!list_e.equals_datatype(&list_g));
962 assert!(!list_f.equals_datatype(&list_g));
963
964 let list_h = DataType::Struct(Fields::from(vec![Field::new("f1", list_e, true)]));
965 let list_i = DataType::Struct(Fields::from(vec![Field::new("f1", list_f.clone(), true)]));
966 let list_j = DataType::Struct(Fields::from(vec![Field::new("f1", list_f.clone(), false)]));
967 let list_k = DataType::Struct(Fields::from(vec![
968 Field::new("f1", list_f.clone(), false),
969 Field::new("f2", list_g.clone(), false),
970 Field::new("f3", DataType::Utf8, true),
971 ]));
972 let list_l = DataType::Struct(Fields::from(vec![
973 Field::new("ff1", list_f.clone(), false),
974 Field::new("ff2", list_g.clone(), false),
975 Field::new("ff3", DataType::LargeUtf8, true),
976 ]));
977 let list_m = DataType::Struct(Fields::from(vec![
978 Field::new("ff1", list_f, false),
979 Field::new("ff2", list_g, false),
980 Field::new("ff3", DataType::Utf8, true),
981 ]));
982 assert!(list_h.equals_datatype(&list_i));
983 assert!(!list_h.equals_datatype(&list_j));
984 assert!(!list_k.equals_datatype(&list_l));
985 assert!(list_k.equals_datatype(&list_m));
986
987 let list_n = DataType::Map(Arc::new(Field::new("f1", list_a.clone(), true)), true);
988 let list_o = DataType::Map(Arc::new(Field::new("f2", list_b.clone(), true)), true);
989 let list_p = DataType::Map(Arc::new(Field::new("f2", list_b.clone(), true)), false);
990 let list_q = DataType::Map(Arc::new(Field::new("f2", list_c.clone(), true)), true);
991 let list_r = DataType::Map(Arc::new(Field::new("f1", list_a.clone(), false)), true);
992
993 assert!(list_n.equals_datatype(&list_o));
994 assert!(!list_n.equals_datatype(&list_p));
995 assert!(!list_n.equals_datatype(&list_q));
996 assert!(!list_n.equals_datatype(&list_r));
997
998 let list_s = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(list_a));
999 let list_t = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(list_b.clone()));
1000 let list_u = DataType::Dictionary(Box::new(DataType::Int8), Box::new(list_b));
1001 let list_v = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(list_c));
1002
1003 assert!(list_s.equals_datatype(&list_t));
1004 assert!(!list_s.equals_datatype(&list_u));
1005 assert!(!list_s.equals_datatype(&list_v));
1006
1007 let union_a = DataType::Union(
1008 UnionFields::new(
1009 vec![1, 2],
1010 vec![
1011 Field::new("f1", DataType::Utf8, false),
1012 Field::new("f2", DataType::UInt8, false),
1013 ],
1014 ),
1015 UnionMode::Sparse,
1016 );
1017 let union_b = DataType::Union(
1018 UnionFields::new(
1019 vec![1, 2],
1020 vec![
1021 Field::new("ff1", DataType::Utf8, false),
1022 Field::new("ff2", DataType::UInt8, false),
1023 ],
1024 ),
1025 UnionMode::Sparse,
1026 );
1027 let union_c = DataType::Union(
1028 UnionFields::new(
1029 vec![2, 1],
1030 vec![
1031 Field::new("fff2", DataType::UInt8, false),
1032 Field::new("fff1", DataType::Utf8, false),
1033 ],
1034 ),
1035 UnionMode::Sparse,
1036 );
1037 let union_d = DataType::Union(
1038 UnionFields::new(
1039 vec![2, 1],
1040 vec![
1041 Field::new("fff1", DataType::Int8, false),
1042 Field::new("fff2", DataType::UInt8, false),
1043 ],
1044 ),
1045 UnionMode::Sparse,
1046 );
1047 let union_e = DataType::Union(
1048 UnionFields::new(
1049 vec![1, 2],
1050 vec![
1051 Field::new("f1", DataType::Utf8, true),
1052 Field::new("f2", DataType::UInt8, false),
1053 ],
1054 ),
1055 UnionMode::Sparse,
1056 );
1057
1058 assert!(union_a.equals_datatype(&union_b));
1059 assert!(union_a.equals_datatype(&union_c));
1060 assert!(!union_a.equals_datatype(&union_d));
1061 assert!(!union_a.equals_datatype(&union_e));
1062
1063 let list_w = DataType::RunEndEncoded(
1064 Arc::new(Field::new("f1", DataType::Int64, true)),
1065 Arc::new(Field::new("f2", DataType::Utf8, true)),
1066 );
1067 let list_x = DataType::RunEndEncoded(
1068 Arc::new(Field::new("ff1", DataType::Int64, true)),
1069 Arc::new(Field::new("ff2", DataType::Utf8, true)),
1070 );
1071 let list_y = DataType::RunEndEncoded(
1072 Arc::new(Field::new("ff1", DataType::UInt16, true)),
1073 Arc::new(Field::new("ff2", DataType::Utf8, true)),
1074 );
1075 let list_z = DataType::RunEndEncoded(
1076 Arc::new(Field::new("f1", DataType::Int64, false)),
1077 Arc::new(Field::new("f2", DataType::Utf8, true)),
1078 );
1079
1080 assert!(list_w.equals_datatype(&list_x));
1081 assert!(!list_w.equals_datatype(&list_y));
1082 assert!(!list_w.equals_datatype(&list_z));
1083 }
1084
1085 #[test]
1086 fn create_struct_type() {
1087 let _person = DataType::Struct(Fields::from(vec![
1088 Field::new("first_name", DataType::Utf8, false),
1089 Field::new("last_name", DataType::Utf8, false),
1090 Field::new(
1091 "address",
1092 DataType::Struct(Fields::from(vec![
1093 Field::new("street", DataType::Utf8, false),
1094 Field::new("zip", DataType::UInt16, false),
1095 ])),
1096 false,
1097 ),
1098 ]));
1099 }
1100
1101 #[test]
1102 fn test_nested() {
1103 let list = DataType::List(Arc::new(Field::new("foo", DataType::Utf8, true)));
1104 let list_view = DataType::ListView(Arc::new(Field::new("foo", DataType::Utf8, true)));
1105 let large_list_view =
1106 DataType::LargeListView(Arc::new(Field::new("foo", DataType::Utf8, true)));
1107
1108 assert!(!DataType::is_nested(&DataType::Boolean));
1109 assert!(!DataType::is_nested(&DataType::Int32));
1110 assert!(!DataType::is_nested(&DataType::Utf8));
1111 assert!(DataType::is_nested(&list));
1112 assert!(DataType::is_nested(&list_view));
1113 assert!(DataType::is_nested(&large_list_view));
1114
1115 assert!(!DataType::is_nested(&DataType::Dictionary(
1116 Box::new(DataType::Int32),
1117 Box::new(DataType::Boolean)
1118 )));
1119 assert!(!DataType::is_nested(&DataType::Dictionary(
1120 Box::new(DataType::Int32),
1121 Box::new(DataType::Int64)
1122 )));
1123 assert!(!DataType::is_nested(&DataType::Dictionary(
1124 Box::new(DataType::Int32),
1125 Box::new(DataType::LargeUtf8)
1126 )));
1127 assert!(DataType::is_nested(&DataType::Dictionary(
1128 Box::new(DataType::Int32),
1129 Box::new(list)
1130 )));
1131 }
1132
1133 #[test]
1134 fn test_integer() {
1135 // is_integer
1136 assert!(DataType::is_integer(&DataType::Int32));
1137 assert!(DataType::is_integer(&DataType::UInt64));
1138 assert!(!DataType::is_integer(&DataType::Float16));
1139
1140 // is_signed_integer
1141 assert!(DataType::is_signed_integer(&DataType::Int32));
1142 assert!(!DataType::is_signed_integer(&DataType::UInt64));
1143 assert!(!DataType::is_signed_integer(&DataType::Float16));
1144
1145 // is_unsigned_integer
1146 assert!(!DataType::is_unsigned_integer(&DataType::Int32));
1147 assert!(DataType::is_unsigned_integer(&DataType::UInt64));
1148 assert!(!DataType::is_unsigned_integer(&DataType::Float16));
1149
1150 // is_dictionary_key_type
1151 assert!(DataType::is_dictionary_key_type(&DataType::Int32));
1152 assert!(DataType::is_dictionary_key_type(&DataType::UInt64));
1153 assert!(!DataType::is_dictionary_key_type(&DataType::Float16));
1154 }
1155
1156 #[test]
1157 fn test_floating() {
1158 assert!(DataType::is_floating(&DataType::Float16));
1159 assert!(!DataType::is_floating(&DataType::Int32));
1160 }
1161
1162 #[test]
1163 fn test_datatype_is_null() {
1164 assert!(DataType::is_null(&DataType::Null));
1165 assert!(!DataType::is_null(&DataType::Int32));
1166 }
1167
1168 #[test]
1169 fn size_should_not_regress() {
1170 assert_eq!(std::mem::size_of::<DataType>(), 24);
1171 }
1172
1173 #[test]
1174 #[should_panic(expected = "duplicate type id: 1")]
1175 fn test_union_with_duplicated_type_id() {
1176 let type_ids = vec![1, 1];
1177 let _union = DataType::Union(
1178 UnionFields::new(
1179 type_ids,
1180 vec![
1181 Field::new("f1", DataType::Int32, false),
1182 Field::new("f2", DataType::Utf8, false),
1183 ],
1184 ),
1185 UnionMode::Dense,
1186 );
1187 }
1188
1189 #[test]
1190 fn test_try_from_str() {
1191 let data_type: DataType = "Int32".try_into().unwrap();
1192 assert_eq!(data_type, DataType::Int32);
1193 }
1194
1195 #[test]
1196 fn test_from_str() {
1197 let data_type: DataType = "UInt64".parse().unwrap();
1198 assert_eq!(data_type, DataType::UInt64);
1199 }
1200}