parquet_variant/variant.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18pub use self::decimal::{VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantDecimalType};
19pub use self::list::VariantList;
20pub use self::metadata::{EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES, VariantMetadata};
21pub use self::object::VariantObject;
22
23// Publically export types used in the API
24pub use half::f16;
25pub use uuid::Uuid;
26
27use crate::decoder::{
28 self, VariantBasicType, VariantPrimitiveType, get_basic_type, get_primitive_type,
29};
30use crate::path::{VariantPath, VariantPathElement};
31use crate::utils::{first_byte_from_slice, slice_from_slice};
32use arrow::array::ArrowNativeTypeOp;
33use arrow::compute::{
34 DecimalCast, cast_num_to_bool, cast_single_string_to_boolean_default, num_cast,
35 parse_string_to_decimal_native, single_bool_to_numeric, single_decimal_to_float_lossy,
36 single_float_to_decimal,
37};
38use arrow::datatypes::{Decimal32Type, Decimal64Type, Decimal128Type, DecimalType};
39
40use arrow_schema::ArrowError;
41use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
42use num_traits::NumCast;
43use std::ops::Deref;
44
45mod decimal;
46mod list;
47mod metadata;
48mod object;
49
50const MAX_SHORT_STRING_BYTES: usize = 0x3F;
51
52/// A Variant [`ShortString`]
53///
54/// This implementation is a zero cost wrapper over `&str` that ensures
55/// the length of the underlying string is a valid Variant short string (63 bytes or less)
56#[derive(Debug, Clone, Copy, PartialEq)]
57pub struct ShortString<'a>(pub(crate) &'a str);
58
59impl<'a> ShortString<'a> {
60 /// Attempts to interpret `value` as a variant short string value.
61 ///
62 /// # Errors
63 ///
64 /// Returns an error if `value` is longer than the maximum allowed length
65 /// of a Variant short string (63 bytes).
66 pub fn try_new(value: &'a str) -> Result<Self, ArrowError> {
67 if value.len() > MAX_SHORT_STRING_BYTES {
68 return Err(ArrowError::InvalidArgumentError(format!(
69 "value is larger than {MAX_SHORT_STRING_BYTES} bytes"
70 )));
71 }
72
73 Ok(Self(value))
74 }
75
76 /// Returns the underlying Variant short string as a &str
77 pub fn as_str(&self) -> &'a str {
78 self.0
79 }
80}
81
82impl<'a> From<ShortString<'a>> for &'a str {
83 fn from(value: ShortString<'a>) -> Self {
84 value.0
85 }
86}
87
88impl<'a> TryFrom<&'a str> for ShortString<'a> {
89 type Error = ArrowError;
90
91 fn try_from(value: &'a str) -> Result<Self, Self::Error> {
92 Self::try_new(value)
93 }
94}
95
96impl AsRef<str> for ShortString<'_> {
97 fn as_ref(&self) -> &str {
98 self.0
99 }
100}
101
102impl Deref for ShortString<'_> {
103 type Target = str;
104
105 fn deref(&self) -> &Self::Target {
106 self.0
107 }
108}
109
110/// Represents a [Parquet Variant]
111///
112/// The lifetimes `'m` and `'v` are for metadata and value buffers, respectively.
113///
114/// # Background
115///
116/// The [specification] says:
117///
118/// The Variant Binary Encoding allows representation of semi-structured data
119/// (e.g. JSON) in a form that can be efficiently queried by path. The design is
120/// intended to allow efficient access to nested data even in the presence of
121/// very wide or deep structures.
122///
123/// Another motivation for the representation is that (aside from metadata) each
124/// nested Variant value is contiguous and self-contained. For example, in a
125/// Variant containing an Array of Variant values, the representation of an
126/// inner Variant value, when paired with the metadata of the full variant, is
127/// itself a valid Variant.
128///
129/// When stored in Parquet files, Variant fields can also be *shredded*. Shredding
130/// refers to extracting some elements of the variant into separate columns for
131/// more efficient extraction/filter pushdown. The [Variant Shredding
132/// specification] describes the details of shredding Variant values as typed
133/// Parquet columns.
134///
135/// A Variant represents a type that contains one of:
136///
137/// * Primitive: A type and corresponding value (e.g. INT, STRING)
138///
139/// * Array: An ordered list of Variant values
140///
141/// * Object: An unordered collection of string/Variant pairs (i.e. key/value
142/// pairs). An object may not contain duplicate keys.
143///
144/// # Encoding
145///
146/// A Variant is encoded with 2 binary values, the value and the metadata. The
147/// metadata stores a header and an optional dictionary of field names which are
148/// referred to by offset in the value. The value is a binary representation of
149/// the actual data, and varies depending on the type.
150///
151/// # Design Goals
152///
153/// The design goals of the Rust API are as follows:
154/// 1. Speed / Zero copy access (no `clone`ing is required)
155/// 2. Safety
156/// 3. Follow standard Rust conventions
157///
158/// [Parquet Variant]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
159/// [specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
160/// [Variant Shredding specification]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md
161///
162/// # Casting Semantics
163///
164/// Scalar conversion semantics intentionally follow Arrow cast behavior where applicable.
165/// Conversions in this module delegate to Arrow compute cast helpers such as
166/// [`num_cast`], [`cast_num_to_bool`], [`single_bool_to_numeric`], and
167/// [`cast_single_string_to_boolean_default`].
168///
169/// - [`Self::as_boolean`] accepts boolean, numeric, and string variants.
170/// Numeric zero maps to `false`; non-zero maps to `true`. String parsing follows
171/// Arrow UTF8-to-boolean cast rules.
172/// - Numeric accessors such as [`Self::as_int8`], [`Self::as_int64`], [`Self::as_u8`],
173/// [`Self::as_u64`], [`Self::as_f16`], [`Self::as_f32`], and [`Self::as_f64`] accept
174/// boolean and numeric variants (integers, floating-point, and decimals).
175/// They return `None` when conversion is not possible.
176/// - Decimal accessors such as [`Self::as_decimal4`], [`Self::as_decimal8`], and
177/// [`Self::as_decimal16`] accept compatible decimal variants, integer variants,
178/// float variants and string variants.
179/// They return `None` when conversion is not possible.
180///
181/// # Examples:
182///
183/// ## Creating `Variant` from Rust Types
184/// ```
185/// use parquet_variant::Variant;
186/// // variants can be directly constructed
187/// let variant = Variant::Int32(123);
188/// // or constructed via `From` impls
189/// assert_eq!(variant, Variant::from(123i32));
190/// ```
191/// ## Creating `Variant` from metadata and value
192/// ```
193/// # use parquet_variant::{Variant, VariantMetadata};
194/// let metadata = [0x01, 0x00, 0x00];
195/// let value = [0x09, 0x48, 0x49];
196/// // parse the header metadata
197/// assert_eq!(
198/// Variant::from("HI"),
199/// Variant::new(&metadata, &value)
200/// );
201/// ```
202///
203/// ## Using `Variant` values
204/// ```
205/// # use parquet_variant::Variant;
206/// # let variant = Variant::Int32(123);
207/// // variants can be used in match statements like normal enums
208/// match variant {
209/// Variant::Int32(i) => println!("Integer: {}", i),
210/// Variant::String(s) => println!("String: {}", s),
211/// _ => println!("Other variant"),
212/// }
213/// ```
214///
215/// # Validation
216///
217/// Every instance of variant is either _valid_ or _invalid_. depending on whether the
218/// underlying bytes are a valid encoding of a variant value (see below).
219///
220/// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`], or [`Self::with_full_validation`]
221/// are fully _validated_. They always contain _valid_ data, and infallible accesses such as
222/// iteration and indexing are panic-free. The validation cost is `O(m + v)` where `m` and
223/// `v` are the number of bytes in the metadata and value buffers, respectively.
224///
225/// Instances produced by [`Self::new`] and [`Self::new_with_metadata`] are _unvalidated_ and so
226/// they may contain either _valid_ or _invalid_ data. Infallible accesses to variant objects and
227/// arrays, such as iteration and indexing will panic if the underlying bytes are _invalid_, and
228/// fallible alternatives are provided as panic-free alternatives. [`Self::with_full_validation`] can also be
229/// used to _validate_ an _unvalidated_ instance, if desired.
230///
231/// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller
232/// knows the underlying bytes were already validated previously, or if the caller intends to
233/// perform a small number of (fallible) accesses to a large variant value.
234///
235/// A _validated_ variant value guarantees that the associated [metadata] and all nested [object]
236/// and [array] values are _valid_. Primitive variant subtypes are always _valid_ by construction.
237///
238/// # Safety
239///
240/// Even an _invalid_ variant value is still _safe_ to use in the Rust sense. Accessing it with
241/// infallible methods may cause panics but will never lead to undefined behavior.
242///
243/// [metadata]: VariantMetadata#Validation
244/// [object]: VariantObject#Validation
245/// [array]: VariantList#Validation
246#[derive(Clone, PartialEq)]
247pub enum Variant<'m, 'v> {
248 /// Primitive type: Null
249 Null,
250 /// Primitive (type_id=1): INT(8, SIGNED)
251 Int8(i8),
252 /// Primitive (type_id=1): INT(16, SIGNED)
253 Int16(i16),
254 /// Primitive (type_id=1): INT(32, SIGNED)
255 Int32(i32),
256 /// Primitive (type_id=1): INT(64, SIGNED)
257 Int64(i64),
258 /// Primitive (type_id=1): DATE
259 Date(NaiveDate),
260 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, MICROS)
261 TimestampMicros(DateTime<Utc>),
262 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, MICROS)
263 TimestampNtzMicros(NaiveDateTime),
264 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, NANOS)
265 TimestampNanos(DateTime<Utc>),
266 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, NANOS)
267 TimestampNtzNanos(NaiveDateTime),
268 /// Primitive (type_id=1): DECIMAL(precision, scale) 32-bits
269 Decimal4(VariantDecimal4),
270 /// Primitive (type_id=1): DECIMAL(precision, scale) 64-bits
271 Decimal8(VariantDecimal8),
272 /// Primitive (type_id=1): DECIMAL(precision, scale) 128-bits
273 Decimal16(VariantDecimal16),
274 /// Primitive (type_id=1): FLOAT
275 Float(f32),
276 /// Primitive (type_id=1): DOUBLE
277 Double(f64),
278 /// Primitive (type_id=1): BOOLEAN (true)
279 BooleanTrue,
280 /// Primitive (type_id=1): BOOLEAN (false)
281 BooleanFalse,
282 // Note: only need the *value* buffer for these types
283 /// Primitive (type_id=1): BINARY
284 Binary(&'v [u8]),
285 /// Primitive (type_id=1): STRING
286 String(&'v str),
287 /// Primitive (type_id=1): TIME(isAdjustedToUTC=false, MICROS)
288 Time(NaiveTime),
289 /// Primitive (type_id=1): UUID
290 Uuid(Uuid),
291 /// Short String (type_id=2): STRING
292 ShortString(ShortString<'v>),
293 // need both metadata & value
294 /// Object (type_id=3): N/A
295 Object(VariantObject<'m, 'v>),
296 /// Array (type_id=4): N/A
297 List(VariantList<'m, 'v>),
298}
299
300// We don't want this to grow because it could hurt performance of a frequently-created type.
301#[cfg(all(target_pointer_width = "64", target_arch = "s390x"))]
302const _: () = crate::utils::expect_size_of::<Variant>(72);
303#[cfg(all(target_pointer_width = "64", not(target_arch = "s390x")))]
304const _: () = crate::utils::expect_size_of::<Variant>(80);
305#[cfg(target_pointer_width = "32")]
306const _: () = crate::utils::expect_size_of::<Variant>(48);
307
308enum NumericKind {
309 Integer,
310 Float,
311}
312
313trait DecimalCastTarget: NumCast + Default {
314 const KIND: NumericKind;
315}
316
317macro_rules! impl_decimal_cast_target {
318 ($raw_type: ident, $target_kind:expr) => {
319 impl DecimalCastTarget for $raw_type {
320 const KIND: NumericKind = $target_kind;
321 }
322 };
323}
324
325impl_decimal_cast_target!(i8, NumericKind::Integer);
326impl_decimal_cast_target!(i16, NumericKind::Integer);
327impl_decimal_cast_target!(i32, NumericKind::Integer);
328impl_decimal_cast_target!(i64, NumericKind::Integer);
329impl_decimal_cast_target!(u8, NumericKind::Integer);
330impl_decimal_cast_target!(u16, NumericKind::Integer);
331impl_decimal_cast_target!(u32, NumericKind::Integer);
332impl_decimal_cast_target!(u64, NumericKind::Integer);
333impl_decimal_cast_target!(f16, NumericKind::Float);
334impl_decimal_cast_target!(f32, NumericKind::Float);
335impl_decimal_cast_target!(f64, NumericKind::Float);
336
337impl<'m, 'v> Variant<'m, 'v> {
338 /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
339 ///
340 /// The instance is fully [validated].
341 ///
342 /// # Example
343 /// ```
344 /// use parquet_variant::{Variant, VariantMetadata};
345 /// let metadata = [0x01, 0x00, 0x00];
346 /// let value = [0x09, 0x48, 0x49];
347 /// // parse the header metadata
348 /// assert_eq!(
349 /// Variant::from("HI"),
350 /// Variant::try_new(&metadata, &value).unwrap()
351 /// );
352 /// ```
353 ///
354 /// [validated]: Self#Validation
355 pub fn try_new(metadata: &'m [u8], value: &'v [u8]) -> Result<Self, ArrowError> {
356 let metadata = VariantMetadata::try_new(metadata)?;
357 Self::try_new_with_metadata(metadata, value)
358 }
359
360 /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
361 ///
362 /// The instance is [unvalidated].
363 ///
364 /// # Example
365 /// ```
366 /// use parquet_variant::{Variant, VariantMetadata};
367 /// let metadata = [0x01, 0x00, 0x00];
368 /// let value = [0x09, 0x48, 0x49];
369 /// // parse the header metadata
370 /// assert_eq!(
371 /// Variant::from("HI"),
372 /// Variant::new(&metadata, &value)
373 /// );
374 /// ```
375 ///
376 /// [unvalidated]: Self#Validation
377 pub fn new(metadata: &'m [u8], value: &'v [u8]) -> Self {
378 let metadata = VariantMetadata::try_new_with_shallow_validation(metadata)
379 .expect("Invalid variant metadata");
380 Self::try_new_with_metadata_and_shallow_validation(metadata, value)
381 .expect("Invalid variant data")
382 }
383
384 /// Create a new variant with existing metadata.
385 ///
386 /// The instance is fully [validated].
387 ///
388 /// # Example
389 /// ```
390 /// # use parquet_variant::{Variant, VariantMetadata};
391 /// let metadata = [0x01, 0x00, 0x00];
392 /// let value = [0x09, 0x48, 0x49];
393 /// // parse the header metadata first
394 /// let metadata = VariantMetadata::new(&metadata);
395 /// assert_eq!(
396 /// Variant::from("HI"),
397 /// Variant::try_new_with_metadata(metadata, &value).unwrap()
398 /// );
399 /// ```
400 ///
401 /// [validated]: Self#Validation
402 pub fn try_new_with_metadata(
403 metadata: VariantMetadata<'m>,
404 value: &'v [u8],
405 ) -> Result<Self, ArrowError> {
406 Self::try_new_with_metadata_and_shallow_validation(metadata, value)?.with_full_validation()
407 }
408
409 /// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
410 ///
411 /// [unvalidated]: Self#Validation
412 pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
413 Self::try_new_with_metadata_and_shallow_validation(metadata, value)
414 .expect("Invalid variant")
415 }
416
417 // The actual constructor, which only performs shallow (constant-time) validation.
418 fn try_new_with_metadata_and_shallow_validation(
419 metadata: VariantMetadata<'m>,
420 value: &'v [u8],
421 ) -> Result<Self, ArrowError> {
422 let value_metadata = first_byte_from_slice(value)?;
423 let value_data = slice_from_slice(value, 1..)?;
424 let new_self = match get_basic_type(value_metadata) {
425 VariantBasicType::Primitive => match get_primitive_type(value_metadata)? {
426 VariantPrimitiveType::Null => Variant::Null,
427 VariantPrimitiveType::Int8 => Variant::Int8(decoder::decode_int8(value_data)?),
428 VariantPrimitiveType::Int16 => Variant::Int16(decoder::decode_int16(value_data)?),
429 VariantPrimitiveType::Int32 => Variant::Int32(decoder::decode_int32(value_data)?),
430 VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
431 VariantPrimitiveType::Decimal4 => {
432 let (integer, scale) = decoder::decode_decimal4(value_data)?;
433 Variant::Decimal4(VariantDecimal4::try_new(integer, scale)?)
434 }
435 VariantPrimitiveType::Decimal8 => {
436 let (integer, scale) = decoder::decode_decimal8(value_data)?;
437 Variant::Decimal8(VariantDecimal8::try_new(integer, scale)?)
438 }
439 VariantPrimitiveType::Decimal16 => {
440 let (integer, scale) = decoder::decode_decimal16(value_data)?;
441 Variant::Decimal16(VariantDecimal16::try_new(integer, scale)?)
442 }
443 VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
444 VariantPrimitiveType::Double => {
445 Variant::Double(decoder::decode_double(value_data)?)
446 }
447 VariantPrimitiveType::BooleanTrue => Variant::BooleanTrue,
448 VariantPrimitiveType::BooleanFalse => Variant::BooleanFalse,
449 VariantPrimitiveType::Date => Variant::Date(decoder::decode_date(value_data)?),
450 VariantPrimitiveType::TimestampMicros => {
451 Variant::TimestampMicros(decoder::decode_timestamp_micros(value_data)?)
452 }
453 VariantPrimitiveType::TimestampNtzMicros => {
454 Variant::TimestampNtzMicros(decoder::decode_timestampntz_micros(value_data)?)
455 }
456 VariantPrimitiveType::TimestampNanos => {
457 Variant::TimestampNanos(decoder::decode_timestamp_nanos(value_data)?)
458 }
459 VariantPrimitiveType::TimestampNtzNanos => {
460 Variant::TimestampNtzNanos(decoder::decode_timestampntz_nanos(value_data)?)
461 }
462 VariantPrimitiveType::Uuid => Variant::Uuid(decoder::decode_uuid(value_data)?),
463 VariantPrimitiveType::Binary => {
464 Variant::Binary(decoder::decode_binary(value_data)?)
465 }
466 VariantPrimitiveType::String => {
467 Variant::String(decoder::decode_long_string(value_data)?)
468 }
469 VariantPrimitiveType::Time => Variant::Time(decoder::decode_time_ntz(value_data)?),
470 },
471 VariantBasicType::ShortString => {
472 Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
473 }
474 VariantBasicType::Object => Variant::Object(
475 VariantObject::try_new_with_shallow_validation(metadata, value)?,
476 ),
477 VariantBasicType::Array => Variant::List(VariantList::try_new_with_shallow_validation(
478 metadata, value,
479 )?),
480 };
481 Ok(new_self)
482 }
483
484 /// True if this variant instance has already been [validated].
485 ///
486 /// [validated]: Self#Validation
487 pub fn is_fully_validated(&self) -> bool {
488 match self {
489 Variant::List(list) => list.is_fully_validated(),
490 Variant::Object(obj) => obj.is_fully_validated(),
491 _ => true,
492 }
493 }
494
495 /// Recursively validates this variant value, ensuring that infallible access will not panic due
496 /// to invalid bytes.
497 ///
498 /// Variant leaf values are always valid by construction, but [objects] and [arrays] can be
499 /// constructed in unvalidated (and potentially invalid) state.
500 ///
501 /// If [`Self::is_fully_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)`
502 /// where `m` and `v` are the sizes of metadata and value buffers, respectively.
503 ///
504 /// [objects]: VariantObject#Validation
505 /// [arrays]: VariantList#Validation
506 pub fn with_full_validation(self) -> Result<Self, ArrowError> {
507 use Variant::*;
508 match self {
509 List(list) => list.with_full_validation().map(List),
510 Object(obj) => obj.with_full_validation().map(Object),
511 _ => Ok(self),
512 }
513 }
514
515 /// Converts this variant to `()` if it is null.
516 ///
517 /// Returns `Some(())` for null variants,
518 /// `None` for non-null variants.
519 ///
520 /// # Examples
521 ///
522 /// ```
523 /// use parquet_variant::Variant;
524 ///
525 /// // you can extract `()` from a null variant
526 /// let v1 = Variant::from(());
527 /// assert_eq!(v1.as_null(), Some(()));
528 ///
529 /// // but not from other variants
530 /// let v2 = Variant::from("hello!");
531 /// assert_eq!(v2.as_null(), None);
532 /// ```
533 pub fn as_null(&self) -> Option<()> {
534 matches!(self, Variant::Null).then_some(())
535 }
536
537 /// Converts this variant to a `bool` if possible.
538 ///
539 /// Returns `Some(bool)` for boolean, numeric and string variants,
540 /// `None` for non-boolean variants.
541 ///
542 /// # Examples
543 ///
544 /// ```
545 /// use parquet_variant::Variant;
546 ///
547 /// // you can extract a bool from the true variant
548 /// let v1 = Variant::from(true);
549 /// assert_eq!(v1.as_boolean(), Some(true));
550 ///
551 /// // and the false variant
552 /// let v2 = Variant::from(false);
553 /// assert_eq!(v2.as_boolean(), Some(false));
554 ///
555 /// // and a numeric variant
556 /// let v3 = Variant::from(3);
557 /// assert_eq!(v3.as_boolean(), Some(true));
558 ///
559 /// // and a string variant
560 /// let v4 = Variant::from("true");
561 /// assert_eq!(v4.as_boolean(), Some(true));
562 ///
563 /// // but not from other variants
564 /// let v5 = Variant::from("hello!");
565 /// assert_eq!(v5.as_boolean(), None);
566 /// ```
567 pub fn as_boolean(&self) -> Option<bool> {
568 match self {
569 Variant::BooleanTrue => Some(true),
570 Variant::BooleanFalse => Some(false),
571 Variant::Int8(i) => Some(cast_num_to_bool(*i)),
572 Variant::Int16(i) => Some(cast_num_to_bool(*i)),
573 Variant::Int32(i) => Some(cast_num_to_bool(*i)),
574 Variant::Int64(i) => Some(cast_num_to_bool(*i)),
575 Variant::Float(f) => Some(cast_num_to_bool(*f)),
576 Variant::Double(d) => Some(cast_num_to_bool(*d)),
577 Variant::ShortString(s) => cast_single_string_to_boolean_default(s.as_str()),
578 Variant::String(s) => cast_single_string_to_boolean_default(s),
579 _ => None,
580 }
581 }
582
583 /// Converts this variant to a `NaiveDate` if possible.
584 ///
585 /// Returns `Some(NaiveDate)` for date variants,
586 /// `None` for non-date variants.
587 ///
588 /// # Examples
589 ///
590 /// ```
591 /// use parquet_variant::Variant;
592 /// use chrono::NaiveDate;
593 ///
594 /// // you can extract a NaiveDate from a date variant
595 /// let date = NaiveDate::from_ymd_opt(2025, 4, 12).unwrap();
596 /// let v1 = Variant::from(date);
597 /// assert_eq!(v1.as_naive_date(), Some(date));
598 ///
599 /// // but not from other variants
600 /// let v2 = Variant::from("hello!");
601 /// assert_eq!(v2.as_naive_date(), None);
602 /// ```
603 pub fn as_naive_date(&self) -> Option<NaiveDate> {
604 if let Variant::Date(d) = self {
605 Some(*d)
606 } else {
607 None
608 }
609 }
610
611 /// Converts this variant to a `DateTime<Utc>` if possible.
612 ///
613 /// Returns `Some(DateTime<Utc>)` for [`Variant::TimestampMicros`] variants,
614 /// `None` for other variants.
615 ///
616 /// # Examples
617 ///
618 /// ```
619 /// use parquet_variant::Variant;
620 /// use chrono::NaiveDate;
621 ///
622 /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
623 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
624 /// .unwrap()
625 /// .and_hms_milli_opt(12, 34, 56, 780)
626 /// .unwrap()
627 /// .and_utc();
628 /// let v1 = Variant::from(datetime);
629 /// assert_eq!(v1.as_timestamp_micros(), Some(datetime));
630 ///
631 /// // but not for other variants.
632 /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
633 /// .unwrap()
634 /// .and_hms_nano_opt(12, 33, 54, 123456789)
635 /// .unwrap()
636 /// .and_utc();
637 /// let v2 = Variant::from(datetime_nanos);
638 /// assert_eq!(v2.as_timestamp_micros(), None);
639 /// ```
640 pub fn as_timestamp_micros(&self) -> Option<DateTime<Utc>> {
641 match *self {
642 Variant::TimestampMicros(d) => Some(d),
643 _ => None,
644 }
645 }
646
647 /// Converts this variant to a `NaiveDateTime` if possible.
648 ///
649 /// Returns `Some(NaiveDateTime)` for [`Variant::TimestampNtzMicros`] variants,
650 /// `None` for other variants.
651 ///
652 /// # Examples
653 ///
654 /// ```
655 /// use parquet_variant::Variant;
656 /// use chrono::NaiveDate;
657 ///
658 /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
659 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
660 /// .unwrap()
661 /// .and_hms_milli_opt(12, 34, 56, 780)
662 /// .unwrap();
663 /// let v1 = Variant::from(datetime);
664 /// assert_eq!(v1.as_timestamp_ntz_micros(), Some(datetime));
665 ///
666 /// // but not for other variants.
667 /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
668 /// .unwrap()
669 /// .and_hms_nano_opt(12, 33, 54, 123456789)
670 /// .unwrap();
671 /// let v2 = Variant::from(datetime_nanos);
672 /// assert_eq!(v2.as_timestamp_micros(), None);
673 /// ```
674 pub fn as_timestamp_ntz_micros(&self) -> Option<NaiveDateTime> {
675 match *self {
676 Variant::TimestampNtzMicros(d) => Some(d),
677 _ => None,
678 }
679 }
680
681 /// Converts this variant to a `DateTime<Utc>` if possible.
682 ///
683 /// Returns `Some(DateTime<Utc>)` for timestamp variants,
684 /// `None` for other variants.
685 ///
686 /// # Examples
687 ///
688 /// ```
689 /// use parquet_variant::Variant;
690 /// use chrono::NaiveDate;
691 ///
692 /// // you can extract a DateTime<Utc> from a UTC-adjusted nanosecond-precision variant
693 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
694 /// .unwrap()
695 /// .and_hms_nano_opt(12, 34, 56, 789123456)
696 /// .unwrap()
697 /// .and_utc();
698 /// let v1 = Variant::from(datetime);
699 /// assert_eq!(v1.as_timestamp_nanos(), Some(datetime));
700 ///
701 /// // or from UTC-adjusted microsecond-precision variant
702 /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
703 /// .unwrap()
704 /// .and_hms_milli_opt(12, 33, 54, 123)
705 /// .unwrap()
706 /// .and_utc();
707 /// // this will convert to `Variant::TimestampMicros`.
708 /// let v2 = Variant::from(datetime_micros);
709 /// assert_eq!(v2.as_timestamp_nanos(), Some(datetime_micros));
710 ///
711 /// // but not for other variants.
712 /// let v3 = Variant::from("hello!");
713 /// assert_eq!(v3.as_timestamp_nanos(), None);
714 /// ```
715 pub fn as_timestamp_nanos(&self) -> Option<DateTime<Utc>> {
716 match *self {
717 Variant::TimestampNanos(d) | Variant::TimestampMicros(d) => Some(d),
718 _ => None,
719 }
720 }
721
722 /// Converts this variant to a `NaiveDateTime` if possible.
723 ///
724 /// Returns `Some(NaiveDateTime)` for timestamp variants,
725 /// `None` for other variants.
726 ///
727 /// # Examples
728 ///
729 /// ```
730 /// use parquet_variant::Variant;
731 /// use chrono::NaiveDate;
732 ///
733 /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
734 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
735 /// .unwrap()
736 /// .and_hms_nano_opt(12, 34, 56, 789123456)
737 /// .unwrap();
738 /// let v1 = Variant::from(datetime);
739 /// assert_eq!(v1.as_timestamp_ntz_nanos(), Some(datetime));
740 ///
741 /// // or from a microsecond-precision non-UTC-adjusted variant
742 /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
743 /// .unwrap()
744 /// .and_hms_milli_opt(12, 33, 54, 123)
745 /// .unwrap();
746 /// // this will convert to `Variant::TimestampMicros`.
747 /// let v2 = Variant::from(datetime_micros);
748 /// assert_eq!(v2.as_timestamp_ntz_nanos(), Some(datetime_micros));
749 ///
750 /// // but not for other variants.
751 /// let v3 = Variant::from("hello!");
752 /// assert_eq!(v3.as_timestamp_ntz_nanos(), None);
753 /// ```
754 pub fn as_timestamp_ntz_nanos(&self) -> Option<NaiveDateTime> {
755 match *self {
756 Variant::TimestampNtzNanos(d) | Variant::TimestampNtzMicros(d) => Some(d),
757 _ => None,
758 }
759 }
760
761 /// Converts this variant to a `&[u8]` if possible.
762 ///
763 /// Returns `Some(&[u8])` for binary variants,
764 /// `None` for non-binary variants.
765 ///
766 /// # Examples
767 ///
768 /// ```
769 /// use parquet_variant::Variant;
770 ///
771 /// // you can extract a byte slice from a binary variant
772 /// let data = b"hello!";
773 /// let v1 = Variant::Binary(data);
774 /// assert_eq!(v1.as_u8_slice(), Some(data.as_slice()));
775 ///
776 /// // but not from other variant types
777 /// let v2 = Variant::from(123i64);
778 /// assert_eq!(v2.as_u8_slice(), None);
779 /// ```
780 pub fn as_u8_slice(&'v self) -> Option<&'v [u8]> {
781 if let Variant::Binary(d) = self {
782 Some(d)
783 } else {
784 None
785 }
786 }
787
788 /// Converts this variant to a `&str` if possible.
789 ///
790 /// Returns `Some(&str)` for string variants (both regular and short strings),
791 /// `None` for non-string variants.
792 ///
793 /// # Examples
794 ///
795 /// ```
796 /// use parquet_variant::Variant;
797 ///
798 /// // you can extract a string from string variants
799 /// let s = "hello!";
800 /// let v1 = Variant::from(s);
801 /// assert_eq!(v1.as_string(), Some(s));
802 ///
803 /// // but not from other variants
804 /// let v2 = Variant::from(123i64);
805 /// assert_eq!(v2.as_string(), None);
806 /// ```
807 pub fn as_string(&'v self) -> Option<&'v str> {
808 match self {
809 Variant::String(s) | Variant::ShortString(ShortString(s)) => Some(s),
810 _ => None,
811 }
812 }
813
814 /// Converts this variant to a `uuid hyphenated string` if possible.
815 ///
816 /// Returns `Some(String)` for UUID variants, `None` for non-UUID variants.
817 ///
818 /// # Examples
819 ///
820 /// ```
821 /// use parquet_variant::Variant;
822 ///
823 /// // You can extract a UUID from a UUID variant
824 /// let s = uuid::Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
825 /// let v1 = Variant::Uuid(s);
826 /// assert_eq!(s, v1.as_uuid().unwrap());
827 /// assert_eq!("67e55044-10b1-426f-9247-bb680e5fe0c8", v1.as_uuid().unwrap().to_string());
828 ///
829 /// //but not from other variants
830 /// let v2 = Variant::from(1234);
831 /// assert_eq!(None, v2.as_uuid())
832 /// ```
833 pub fn as_uuid(&self) -> Option<Uuid> {
834 match self {
835 Variant::Uuid(u) => Some(*u),
836 _ => None,
837 }
838 }
839
840 fn cast_decimal_to_num<D, T, F>(raw: D::Native, scale: u8, as_float: F) -> Option<T>
841 where
842 D: DecimalType,
843 D::Native: NumCast + ArrowNativeTypeOp,
844 T: DecimalCastTarget,
845 F: Fn(D::Native) -> f64,
846 {
847 let base: D::Native = NumCast::from(10)?;
848
849 let div = base.pow_checked(<u32 as From<u8>>::from(scale)).ok()?;
850 match T::KIND {
851 NumericKind::Integer => raw
852 .div_checked(div)
853 .ok()
854 .and_then(<T as NumCast>::from::<D::Native>),
855 NumericKind::Float => T::from(single_decimal_to_float_lossy::<D, _>(
856 &as_float,
857 raw,
858 <i32 as From<u8>>::from(scale),
859 )),
860 }
861 }
862
863 /// Converts a boolean or numeric variant(integers, floating-point, and decimals)
864 /// to the specified numeric type `T`.
865 ///
866 /// Uses Arrow's casting logic to perform the conversion. Returns `Some(T)` if
867 /// the conversion succeeds, `None` if the variant can't be casted to type `T`.
868 fn as_num<T>(&self) -> Option<T>
869 where
870 T: DecimalCastTarget,
871 {
872 match *self {
873 Variant::BooleanFalse => single_bool_to_numeric(false),
874 Variant::BooleanTrue => single_bool_to_numeric(true),
875 Variant::Int8(i) => num_cast(i),
876 Variant::Int16(i) => num_cast(i),
877 Variant::Int32(i) => num_cast(i),
878 Variant::Int64(i) => num_cast(i),
879 Variant::Float(f) => num_cast(f),
880 Variant::Double(d) => num_cast(d),
881 Variant::Decimal4(d) => {
882 Self::cast_decimal_to_num::<Decimal32Type, T, _>(d.integer(), d.scale(), |x| {
883 x as f64
884 })
885 }
886 Variant::Decimal8(d) => {
887 Self::cast_decimal_to_num::<Decimal64Type, T, _>(d.integer(), d.scale(), |x| {
888 x as f64
889 })
890 }
891 Variant::Decimal16(d) => {
892 Self::cast_decimal_to_num::<Decimal128Type, T, _>(d.integer(), d.scale(), |x| {
893 x as f64
894 })
895 }
896 _ => None,
897 }
898 }
899
900 /// Converts this variant to an `i8` if possible.
901 ///
902 /// Returns `Some(i8)` for boolean and numeric variants(integers, floating-point,
903 /// and decimals with scale 0) that fit in `i8` range,
904 /// `None` for other variants or values that would overflow.
905 ///
906 /// # Examples
907 ///
908 /// ```
909 /// use parquet_variant::Variant;
910 ///
911 /// // you can read an int64 variant into an i8 if it fits
912 /// let v1 = Variant::from(123i64);
913 /// assert_eq!(v1.as_int8(), Some(123i8));
914 ///
915 /// // or from boolean variant
916 /// let v2 = Variant::BooleanFalse;
917 /// assert_eq!(v2.as_int8(), Some(0));
918 ///
919 /// // but not if it would overflow
920 /// let v3 = Variant::from(1234i64);
921 /// assert_eq!(v3.as_int8(), None);
922 ///
923 /// // or if the variant cannot be cast into an integer
924 /// let v4 = Variant::from("hello!");
925 /// assert_eq!(v4.as_int8(), None);
926 /// ```
927 pub fn as_int8(&self) -> Option<i8> {
928 self.as_num()
929 }
930
931 /// Converts this variant to an `i16` if possible.
932 ///
933 /// Returns `Some(i16)` for boolean and numeric variants(integers, floating-point,
934 /// and decimals with scale 0) that fit in `i16` range
935 /// `None` for other variants or values that would overflow.
936 ///
937 /// # Examples
938 ///
939 /// ```
940 /// use parquet_variant::Variant;
941 ///
942 /// // you can read an int64 variant into an i16 if it fits
943 /// let v1 = Variant::from(123i64);
944 /// assert_eq!(v1.as_int16(), Some(123i16));
945 ///
946 /// // or from boolean variant
947 /// let v2 = Variant::BooleanFalse;
948 /// assert_eq!(v2.as_int16(), Some(0));
949 ///
950 /// // but not if it would overflow
951 /// let v3 = Variant::from(123456i64);
952 /// assert_eq!(v3.as_int16(), None);
953 ///
954 /// // or if the variant cannot be cast into an integer
955 /// let v4 = Variant::from("hello!");
956 /// assert_eq!(v4.as_int16(), None);
957 /// ```
958 pub fn as_int16(&self) -> Option<i16> {
959 self.as_num()
960 }
961
962 /// Converts this variant to an `i32` if possible.
963 ///
964 /// Returns `Some(i32)` for boolean and numeric variants(integers, floating-point,
965 /// and decimals with scale 0) that fit in `i32` range
966 /// `None` for other variants or values that would overflow.
967 ///
968 /// # Examples
969 ///
970 /// ```
971 /// use parquet_variant::Variant;
972 ///
973 /// // you can read an int64 variant into an i32 if it fits
974 /// let v1 = Variant::from(123i64);
975 /// assert_eq!(v1.as_int32(), Some(123i32));
976 ///
977 /// // or from boolean variant
978 /// let v2 = Variant::BooleanFalse;
979 /// assert_eq!(v2.as_int32(), Some(0));
980 ///
981 /// // but not if it would overflow
982 /// let v3 = Variant::from(12345678901i64);
983 /// assert_eq!(v3.as_int32(), None);
984 ///
985 /// // or if the variant cannot be cast into an integer
986 /// let v4 = Variant::from("hello!");
987 /// assert_eq!(v4.as_int32(), None);
988 /// ```
989 pub fn as_int32(&self) -> Option<i32> {
990 self.as_num()
991 }
992
993 /// Converts this variant to an `i64` if possible.
994 ///
995 /// Returns `Some(i64)` for boolean and numeric variants(integers, floating-point,
996 /// and decimals with scale 0) that fit in `i64` range
997 /// `None` for other variants or values that would overflow.
998 ///
999 /// # Examples
1000 ///
1001 /// ```
1002 /// use parquet_variant::Variant;
1003 ///
1004 /// // you can read an int64 variant into an i64
1005 /// let v1 = Variant::from(123i64);
1006 /// assert_eq!(v1.as_int64(), Some(123i64));
1007 ///
1008 /// // or from boolean variant
1009 /// let v2 = Variant::BooleanFalse;
1010 /// assert_eq!(v2.as_int64(), Some(0));
1011 ///
1012 /// // but not a variant that cannot be cast into an integer
1013 /// let v3 = Variant::from("hello!");
1014 /// assert_eq!(v3.as_int64(), None);
1015 /// ```
1016 pub fn as_int64(&self) -> Option<i64> {
1017 self.as_num()
1018 }
1019
1020 /// Converts this variant to a `u8` if possible.
1021 ///
1022 /// Returns `Some(u8)` for boolean and numeric variants(integers, floating-point,
1023 /// and decimals with scale 0) that fit in `u8` range
1024 /// `None` for other variants or values that would overflow.
1025 ///
1026 /// # Examples
1027 ///
1028 /// ```
1029 /// use parquet_variant::{Variant, VariantDecimal4};
1030 ///
1031 /// // you can read an int64 variant into an u8
1032 /// let v1 = Variant::from(123i64);
1033 /// assert_eq!(v1.as_u8(), Some(123u8));
1034 ///
1035 /// // or a Decimal4 with scale 0 into u8
1036 /// let d = VariantDecimal4::try_new(26, 0).unwrap();
1037 /// let v2 = Variant::from(d);
1038 /// assert_eq!(v2.as_u8(), Some(26u8));
1039 ///
1040 /// // or a variant that decimal with scale not equal to zero
1041 /// let d = VariantDecimal4::try_new(123, 2).unwrap();
1042 /// let v3 = Variant::from(d);
1043 /// assert_eq!(v3.as_u8(), Some(1));
1044 ///
1045 /// // or from boolean variant
1046 /// let v4 = Variant::BooleanFalse;
1047 /// assert_eq!(v4.as_u8(), Some(0));
1048 ///
1049 /// // but not a variant that can't fit into the range
1050 /// let v5 = Variant::from(-1);
1051 /// assert_eq!(v5.as_u8(), None);
1052 ///
1053 /// // or not a variant that cannot be cast into an integer
1054 /// let v6 = Variant::from("hello!");
1055 /// assert_eq!(v6.as_u8(), None);
1056 /// ```
1057 pub fn as_u8(&self) -> Option<u8> {
1058 self.as_num()
1059 }
1060
1061 /// Converts this variant to an `u16` if possible.
1062 ///
1063 /// Returns `Some(u16)` for boolean and numeric variants(integers, floating-point,
1064 /// and decimals with scale 0) that fit in `u16` range
1065 /// `None` for other variants or values that would overflow.
1066 ///
1067 /// # Examples
1068 ///
1069 /// ```
1070 /// use parquet_variant::{Variant, VariantDecimal4};
1071 ///
1072 /// // you can read an int64 variant into an u16
1073 /// let v1 = Variant::from(123i64);
1074 /// assert_eq!(v1.as_u16(), Some(123u16));
1075 ///
1076 /// // or a Decimal4 with scale 0 into u8
1077 /// let d = VariantDecimal4::try_new(u16::MAX as i32, 0).unwrap();
1078 /// let v2 = Variant::from(d);
1079 /// assert_eq!(v2.as_u16(), Some(u16::MAX));
1080 ///
1081 /// // or a variant that decimal with scale not equal to zero
1082 /// let d = VariantDecimal4::try_new(123, 2).unwrap();
1083 /// let v3 = Variant::from(d);
1084 /// assert_eq!(v3.as_u16(), Some(1));
1085 ///
1086 /// // or from boolean variant
1087 /// let v4= Variant::BooleanFalse;
1088 /// assert_eq!(v4.as_u16(), Some(0));
1089 ///
1090 /// // but not a variant that can't fit into the range
1091 /// let v5 = Variant::from(-1);
1092 /// assert_eq!(v5.as_u16(), None);
1093 ///
1094 /// // or not a variant that cannot be cast into an integer
1095 /// let v6 = Variant::from("hello!");
1096 /// assert_eq!(v6.as_u16(), None);
1097 /// ```
1098 pub fn as_u16(&self) -> Option<u16> {
1099 self.as_num()
1100 }
1101
1102 /// Converts this variant to an `u32` if possible.
1103 ///
1104 /// Returns `Some(u32)` for boolean and numeric variants(integers, floating-point,
1105 /// and decimals with scale 0) that fit in `u32` range
1106 /// `None` for other variants or values that would overflow.
1107 ///
1108 /// # Examples
1109 ///
1110 /// ```
1111 /// use parquet_variant::{Variant, VariantDecimal8};
1112 ///
1113 /// // you can read an int64 variant into an u32
1114 /// let v1 = Variant::from(123i64);
1115 /// assert_eq!(v1.as_u32(), Some(123u32));
1116 ///
1117 /// // or a Decimal4 with scale 0 into u8
1118 /// let d = VariantDecimal8::try_new(u32::MAX as i64, 0).unwrap();
1119 /// let v2 = Variant::from(d);
1120 /// assert_eq!(v2.as_u32(), Some(u32::MAX));
1121 ///
1122 /// // or a variant that decimal with scale not equal to zero
1123 /// let d = VariantDecimal8::try_new(123, 2).unwrap();
1124 /// let v3 = Variant::from(d);
1125 /// assert_eq!(v3.as_u32(), Some(1));
1126 ///
1127 /// // or from boolean variant
1128 /// let v4 = Variant::BooleanFalse;
1129 /// assert_eq!(v4.as_u32(), Some(0));
1130 ///
1131 /// // but not a variant that can't fit into the range
1132 /// let v5 = Variant::from(-1);
1133 /// assert_eq!(v5.as_u32(), None);
1134 ///
1135 /// // or not a variant that cannot be cast into an integer
1136 /// let v6 = Variant::from("hello!");
1137 /// assert_eq!(v6.as_u32(), None);
1138 /// ```
1139 pub fn as_u32(&self) -> Option<u32> {
1140 self.as_num()
1141 }
1142
1143 /// Converts this variant to an `u64` if possible.
1144 ///
1145 /// Returns `Some(u64)` for boolean and numeric variants(integers, floating-point,
1146 /// and decimals with scale 0) that fit in `u64` range
1147 /// `None` for other variants or values that would overflow.
1148 ///
1149 /// # Examples
1150 ///
1151 /// ```
1152 /// use parquet_variant::{Variant, VariantDecimal16};
1153 ///
1154 /// // you can read an int64 variant into an u64
1155 /// let v1 = Variant::from(123i64);
1156 /// assert_eq!(v1.as_u64(), Some(123u64));
1157 ///
1158 /// // or a Decimal16 with scale 0 into u8
1159 /// let d = VariantDecimal16::try_new(u64::MAX as i128, 0).unwrap();
1160 /// let v2 = Variant::from(d);
1161 /// assert_eq!(v2.as_u64(), Some(u64::MAX));
1162 ///
1163 /// // or a variant that decimal with scale not equal to zero
1164 /// let d = VariantDecimal16::try_new(123, 2).unwrap();
1165 /// let v3 = Variant::from(d);
1166 /// assert_eq!(v3.as_u64(), Some(1));
1167 ///
1168 /// // or from boolean variant
1169 /// let v4 = Variant::BooleanFalse;
1170 /// assert_eq!(v4.as_u64(), Some(0));
1171 ///
1172 /// // but not a variant that can't fit into the range
1173 /// let v5 = Variant::from(-1);
1174 /// assert_eq!(v5.as_u64(), None);
1175 ///
1176 /// // or not a variant that cannot be cast into an integer
1177 /// let v6 = Variant::from("hello!");
1178 /// assert_eq!(v6.as_u64(), None);
1179 /// ```
1180 pub fn as_u64(&self) -> Option<u64> {
1181 self.as_num()
1182 }
1183
1184 fn convert_string_to_decimal<D, VD>(input: &str) -> Option<VD>
1185 where
1186 D: DecimalType,
1187 VD: VariantDecimalType<Native = D::Native>,
1188 D::Native: NumCast + DecimalCast,
1189 {
1190 // find the last '.'
1191 let scale_usize = input.rsplit_once('.').map_or(0, |(_, frac)| frac.len());
1192
1193 let scale = u8::try_from(scale_usize).ok()?;
1194
1195 let raw = parse_string_to_decimal_native::<D>(input, scale_usize).ok()?;
1196 VD::try_new(raw, scale).ok()
1197 }
1198
1199 /// Converts this variant to tuple with a 4-byte unscaled value if possible.
1200 ///
1201 /// Returns `Some((i32, u8))` for decimal variants where the unscaled value
1202 /// fits in `i32` range,
1203 /// `None` for non-decimal variants or decimal values that would overflow.
1204 ///
1205 /// # Examples
1206 ///
1207 /// ```
1208 /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8};
1209 ///
1210 /// // you can extract decimal parts from smaller or equally-sized decimal variants
1211 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1212 /// assert_eq!(v1.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
1213 ///
1214 /// // and from larger decimal variants if they fit
1215 /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap());
1216 /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
1217 ///
1218 /// // or from string variants if they can be parsed as decimals
1219 /// let v3 = Variant::from("123.45");
1220 /// assert_eq!(v3.as_decimal4(), VariantDecimal4::try_new(12345, 2).ok());
1221 ///
1222 /// // but not if the value would overflow i32
1223 /// let v4 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap());
1224 /// assert_eq!(v4.as_decimal4(), None);
1225 ///
1226 /// // or if the variant is not a decimal
1227 /// let v5 = Variant::from("hello!");
1228 /// assert_eq!(v5.as_decimal4(), None);
1229 /// ```
1230 pub fn as_decimal4(&self) -> Option<VariantDecimal4> {
1231 match *self {
1232 Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1233 self.as_num::<i32>().and_then(|x| x.try_into().ok())
1234 }
1235 Variant::Float(f) => single_float_to_decimal::<Decimal32Type>(f as _, 1f64)
1236 .and_then(|x: i32| x.try_into().ok()),
1237 Variant::Double(f) => single_float_to_decimal::<Decimal32Type>(f, 1f64)
1238 .and_then(|x: i32| x.try_into().ok()),
1239 Variant::String(v) => Self::convert_string_to_decimal::<Decimal32Type, _>(v),
1240 Variant::ShortString(v) => {
1241 Self::convert_string_to_decimal::<Decimal32Type, _>(v.as_str())
1242 }
1243 Variant::Decimal4(decimal4) => Some(decimal4),
1244 Variant::Decimal8(decimal8) => decimal8.try_into().ok(),
1245 Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
1246 _ => None,
1247 }
1248 }
1249
1250 /// Converts this variant to tuple with an 8-byte unscaled value if possible.
1251 ///
1252 /// Returns `Some((i64, u8))` for decimal variants where the unscaled value
1253 /// fits in `i64` range, the scale will be 0 if the input is string variants.
1254 /// `None` for non-decimal variants or decimal values that would overflow.
1255 ///
1256 /// # Examples
1257 ///
1258 /// ```
1259 /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
1260 ///
1261 /// // you can extract decimal parts from smaller or equally-sized decimal variants
1262 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1263 /// assert_eq!(v1.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
1264 ///
1265 /// // and from larger decimal variants if they fit
1266 /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap());
1267 /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
1268 ///
1269 /// // or from string variants if they can be parsed as decimals
1270 /// let v3 = Variant::from("123.45");
1271 /// assert_eq!(v3.as_decimal8(), VariantDecimal8::try_new(12345, 2).ok());
1272 ///
1273 /// // but not if the value would overflow i64
1274 /// let v4 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap());
1275 /// assert_eq!(v4.as_decimal8(), None);
1276 ///
1277 /// // or if the variant is not a decimal
1278 /// let v5 = Variant::from("hello!");
1279 /// assert_eq!(v5.as_decimal8(), None);
1280 /// ```
1281 pub fn as_decimal8(&self) -> Option<VariantDecimal8> {
1282 match *self {
1283 Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1284 self.as_num::<i64>().and_then(|x| x.try_into().ok())
1285 }
1286 Variant::Float(f) => single_float_to_decimal::<Decimal64Type>(f as _, 1f64)
1287 .and_then(|x: i64| x.try_into().ok()),
1288 Variant::Double(f) => single_float_to_decimal::<Decimal64Type>(f, 1f64)
1289 .and_then(|x: i64| x.try_into().ok()),
1290 Variant::String(v) => Self::convert_string_to_decimal::<Decimal64Type, _>(v),
1291 Variant::ShortString(v) => {
1292 Self::convert_string_to_decimal::<Decimal64Type, _>(v.as_str())
1293 }
1294 Variant::Decimal4(decimal4) => Some(decimal4.into()),
1295 Variant::Decimal8(decimal8) => Some(decimal8),
1296 Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
1297 _ => None,
1298 }
1299 }
1300
1301 /// Converts this variant to tuple with a 16-byte unscaled value if possible.
1302 ///
1303 /// Returns `Some((i128, u8))` for decimal variants where the unscaled value
1304 /// fits in `i128` range, the scale will be 0 if the input is string variants.
1305 /// `None` for non-decimal variants or decimal values that would overflow.
1306 ///
1307 /// # Examples
1308 ///
1309 /// ```
1310 /// use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4};
1311 ///
1312 /// // you can extract decimal parts from smaller or equally-sized decimal variants
1313 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1314 /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok());
1315 ///
1316 /// // or from a string variant if it can be parsed as decimal
1317 /// let v2 = Variant::from("123.45");
1318 /// assert_eq!(v2.as_decimal16(), VariantDecimal16::try_new(12345, 2).ok());
1319 ///
1320 /// // but not if the variant is not a decimal
1321 /// let v3 = Variant::from("hello!");
1322 /// assert_eq!(v3.as_decimal16(), None);
1323 /// ```
1324 pub fn as_decimal16(&self) -> Option<VariantDecimal16> {
1325 match *self {
1326 Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1327 let x = self.as_num::<i64>()?;
1328 <i128 as From<i64>>::from(x).try_into().ok()
1329 }
1330 Variant::Float(f) => {
1331 single_float_to_decimal::<Decimal128Type>(<f64 as From<f32>>::from(f), 1f64)
1332 .and_then(|x| x.try_into().ok())
1333 }
1334 Variant::Double(f) => {
1335 single_float_to_decimal::<Decimal128Type>(f, 1f64).and_then(|x| x.try_into().ok())
1336 }
1337 Variant::String(v) => Self::convert_string_to_decimal::<Decimal128Type, _>(v),
1338 Variant::ShortString(v) => {
1339 Self::convert_string_to_decimal::<Decimal128Type, _>(v.as_str())
1340 }
1341 Variant::Decimal4(decimal4) => Some(decimal4.into()),
1342 Variant::Decimal8(decimal8) => Some(decimal8.into()),
1343 Variant::Decimal16(decimal16) => Some(decimal16),
1344 _ => None,
1345 }
1346 }
1347
1348 /// Converts this variant to an `f16` if possible.
1349 ///
1350 /// Returns `Some(f16)` for boolean and numeric variants(integers, floating-point,
1351 /// and decimals with scale 0) that fit in `f16` range
1352 /// `None` otherwise.
1353 ///
1354 /// # Example
1355 ///
1356 /// ```
1357 /// use parquet_variant::Variant;
1358 /// use half::f16;
1359 ///
1360 /// // you can extract an f16 from a float variant
1361 /// let v1 = Variant::from(std::f32::consts::PI);
1362 /// assert_eq!(v1.as_f16(), Some(f16::from_f32(std::f32::consts::PI)));
1363 ///
1364 /// // and from a double variant (with loss of precision to nearest f16)
1365 /// let v2 = Variant::from(std::f64::consts::PI);
1366 /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI)));
1367 ///
1368 /// // and from boolean
1369 /// let v3 = Variant::BooleanTrue;
1370 /// assert_eq!(v3.as_f16(), Some(f16::from_f32(1.0)));
1371 ///
1372 /// // return inf if overflow
1373 /// let v4 = Variant::from(123456);
1374 /// assert_eq!(v4.as_f16(), Some(f16::INFINITY));
1375 ///
1376 /// // but not from other variants
1377 /// let v5 = Variant::from("hello!");
1378 /// assert_eq!(v5.as_f16(), None);
1379 pub fn as_f16(&self) -> Option<f16> {
1380 self.as_num()
1381 }
1382
1383 /// Converts this variant to an `f32` if possible.
1384 ///
1385 /// Returns `Some(f32)` for boolean and numeric variants(integers, floating-point,
1386 /// and decimals with scale 0) that fit in `f32` range
1387 /// `None` otherwise.
1388 ///
1389 /// # Examples
1390 ///
1391 /// ```
1392 /// use parquet_variant::Variant;
1393 ///
1394 /// // you can extract an f32 from a float variant
1395 /// let v1 = Variant::from(std::f32::consts::PI);
1396 /// assert_eq!(v1.as_f32(), Some(std::f32::consts::PI));
1397 ///
1398 /// // and from a double variant (with loss of precision to nearest f32)
1399 /// let v2 = Variant::from(std::f64::consts::PI);
1400 /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
1401 ///
1402 /// // and from boolean variant
1403 /// let v3 = Variant::BooleanTrue;
1404 /// assert_eq!(v3.as_f32(), Some(1.0));
1405 ///
1406 /// // and return inf if overflow
1407 /// let v4 = Variant::from(f64::MAX);
1408 /// assert_eq!(v4.as_f32(), Some(f32::INFINITY));
1409 ///
1410 /// // but not from other variants
1411 /// let v5 = Variant::from("hello!");
1412 /// assert_eq!(v5.as_f32(), None);
1413 /// ```
1414 pub fn as_f32(&self) -> Option<f32> {
1415 self.as_num()
1416 }
1417
1418 /// Converts this variant to an `f64` if possible.
1419 ///
1420 /// Returns `Some(f64)` for boolean and numeric variants(integers, floating-point,
1421 /// and decimals with scale 0) that fit in `f64` range
1422 /// `None` for other variants or can't be represented by an f64.
1423 ///
1424 /// # Examples
1425 ///
1426 /// ```
1427 /// use parquet_variant::Variant;
1428 ///
1429 /// // you can extract an f64 from a float variant
1430 /// let v1 = Variant::from(std::f32::consts::PI);
1431 /// assert_eq!(v1.as_f64(), Some(std::f32::consts::PI as f64));
1432 ///
1433 /// // and from a double variant
1434 /// let v2 = Variant::from(std::f64::consts::PI);
1435 /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
1436 ///
1437 /// // and from boolean variant
1438 /// let v3 = Variant::BooleanTrue;
1439 /// assert_eq!(v3.as_f64(), Some(1.0f64));
1440 ///
1441 /// // but not from other variants
1442 /// let v5 = Variant::from("hello!");
1443 /// assert_eq!(v5.as_f64(), None);
1444 /// ```
1445 pub fn as_f64(&self) -> Option<f64> {
1446 self.as_num()
1447 }
1448
1449 /// Converts this variant to an `Object` if it is an [`VariantObject`].
1450 ///
1451 /// Returns `Some(&VariantObject)` for object variants,
1452 /// `None` for non-object variants.
1453 ///
1454 /// See [`Self::get_path`] to dynamically traverse objects
1455 ///
1456 /// # Examples
1457 /// ```
1458 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1459 /// # let (metadata, value) = {
1460 /// # let mut builder = VariantBuilder::new();
1461 /// # let mut obj = builder.new_object();
1462 /// # obj.insert("name", "John");
1463 /// # obj.finish();
1464 /// # builder.finish()
1465 /// # };
1466 /// // object that is {"name": "John"}
1467 /// let variant = Variant::new(&metadata, &value);
1468 /// // use the `as_object` method to access the object
1469 /// let obj = variant.as_object().expect("variant should be an object");
1470 /// assert_eq!(obj.get("name"), Some(Variant::from("John")));
1471 /// ```
1472 pub fn as_object(&'m self) -> Option<&'m VariantObject<'m, 'v>> {
1473 if let Variant::Object(obj) = self {
1474 Some(obj)
1475 } else {
1476 None
1477 }
1478 }
1479
1480 /// If this is an object and the requested field name exists, retrieves the corresponding field
1481 /// value. Otherwise, returns None.
1482 ///
1483 /// This is shorthand for [`Self::as_object`] followed by [`VariantObject::get`].
1484 ///
1485 /// # Examples
1486 /// ```
1487 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1488 /// # let mut builder = VariantBuilder::new();
1489 /// # let mut obj = builder.new_object();
1490 /// # obj.insert("name", "John");
1491 /// # obj.finish();
1492 /// # let (metadata, value) = builder.finish();
1493 /// // object that is {"name": "John"}
1494 /// let variant = Variant::new(&metadata, &value);
1495 /// // use the `get_object_field` method to access the object
1496 /// let obj = variant.get_object_field("name");
1497 /// assert_eq!(obj, Some(Variant::from("John")));
1498 /// let obj = variant.get_object_field("foo");
1499 /// assert!(obj.is_none());
1500 /// ```
1501 pub fn get_object_field(&self, field_name: &str) -> Option<Self> {
1502 match self {
1503 Variant::Object(object) => object.get(field_name),
1504 _ => None,
1505 }
1506 }
1507
1508 /// Converts this variant to a `List` if it is a [`VariantList`].
1509 ///
1510 /// Returns `Some(&VariantList)` for list variants,
1511 /// `None` for non-list variants.
1512 ///
1513 /// See [`Self::get_path`] to dynamically traverse lists
1514 ///
1515 /// # Examples
1516 /// ```
1517 /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1518 /// # let (metadata, value) = {
1519 /// # let mut builder = VariantBuilder::new();
1520 /// # let mut list = builder.new_list();
1521 /// # list.append_value("John");
1522 /// # list.append_value("Doe");
1523 /// # list.finish();
1524 /// # builder.finish()
1525 /// # };
1526 /// // list that is ["John", "Doe"]
1527 /// let variant = Variant::new(&metadata, &value);
1528 /// // use the `as_list` method to access the list
1529 /// let list = variant.as_list().expect("variant should be a list");
1530 /// assert_eq!(list.len(), 2);
1531 /// assert_eq!(list.get(0).unwrap(), Variant::from("John"));
1532 /// assert_eq!(list.get(1).unwrap(), Variant::from("Doe"));
1533 /// ```
1534 pub fn as_list(&'m self) -> Option<&'m VariantList<'m, 'v>> {
1535 if let Variant::List(list) = self {
1536 Some(list)
1537 } else {
1538 None
1539 }
1540 }
1541
1542 /// Converts this variant to a `NaiveTime` if possible.
1543 ///
1544 /// Returns `Some(NaiveTime)` for `Variant::Time`,
1545 /// `None` for non-Time variants.
1546 ///
1547 /// # Example
1548 ///
1549 /// ```
1550 /// use chrono::NaiveTime;
1551 /// use parquet_variant::Variant;
1552 ///
1553 /// // you can extract a `NaiveTime` from a `Variant::Time`
1554 /// let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1555 /// let v1 = Variant::from(time);
1556 /// assert_eq!(Some(time), v1.as_time_utc());
1557 ///
1558 /// // but not from other variants.
1559 /// let v2 = Variant::from("Hello");
1560 /// assert_eq!(None, v2.as_time_utc());
1561 /// ```
1562 pub fn as_time_utc(&'m self) -> Option<NaiveTime> {
1563 if let Variant::Time(time) = self {
1564 Some(*time)
1565 } else {
1566 None
1567 }
1568 }
1569
1570 /// If this is a list and the requested index is in bounds, retrieves the corresponding
1571 /// element. Otherwise, returns None.
1572 ///
1573 /// This is shorthand for [`Self::as_list`] followed by [`VariantList::get`].
1574 ///
1575 /// # Examples
1576 /// ```
1577 /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1578 /// # let mut builder = VariantBuilder::new();
1579 /// # let mut list = builder.new_list();
1580 /// # list.append_value("John");
1581 /// # list.append_value("Doe");
1582 /// # list.finish();
1583 /// # let (metadata, value) = builder.finish();
1584 /// // list that is ["John", "Doe"]
1585 /// let variant = Variant::new(&metadata, &value);
1586 /// // use the `get_list_element` method to access the list
1587 /// assert_eq!(variant.get_list_element(0), Some(Variant::from("John")));
1588 /// assert_eq!(variant.get_list_element(1), Some(Variant::from("Doe")));
1589 /// assert!(variant.get_list_element(2).is_none());
1590 /// ```
1591 pub fn get_list_element(&self, index: usize) -> Option<Self> {
1592 match self {
1593 Variant::List(list) => list.get(index),
1594 _ => None,
1595 }
1596 }
1597
1598 /// Return the metadata dictionary associated with this variant value.
1599 pub fn metadata(&self) -> &VariantMetadata<'m> {
1600 match self {
1601 Variant::Object(VariantObject { metadata, .. })
1602 | Variant::List(VariantList { metadata, .. }) => metadata,
1603 _ => &EMPTY_VARIANT_METADATA,
1604 }
1605 }
1606
1607 /// Return a new Variant with the path followed.
1608 ///
1609 /// If the path is not found, `None` is returned.
1610 ///
1611 /// # Example
1612 /// ```
1613 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath};
1614 /// # let mut builder = VariantBuilder::new();
1615 /// # let mut obj = builder.new_object();
1616 /// # let mut list = obj.new_list("foo");
1617 /// # list.append_value("bar");
1618 /// # list.append_value("baz");
1619 /// # list.finish();
1620 /// # obj.finish();
1621 /// # let (metadata, value) = builder.finish();
1622 /// // given a variant like `{"foo": ["bar", "baz"]}`
1623 /// let variant = Variant::new(&metadata, &value);
1624 /// // Accessing a non existent path returns None
1625 /// assert_eq!(variant.get_path(&VariantPath::try_from("non_existent").unwrap()), None);
1626 /// // Access obj["foo"]
1627 /// let path = VariantPath::try_from("foo").unwrap();
1628 /// let foo = variant.get_path(&path).expect("field `foo` should exist");
1629 /// assert!(foo.as_list().is_some(), "field `foo` should be a list");
1630 /// // Access foo[0]
1631 /// let path = VariantPath::from(0);
1632 /// let bar = foo.get_path(&path).expect("element 0 should exist");
1633 /// // bar is a string
1634 /// assert_eq!(bar.as_string(), Some("bar"));
1635 /// // You can also access nested paths
1636 /// let path = VariantPath::try_from("foo").unwrap().join(0);
1637 /// assert_eq!(variant.get_path(&path).unwrap(), bar);
1638 /// ```
1639 pub fn get_path(&self, path: &VariantPath) -> Option<Variant<'_, '_>> {
1640 path.iter()
1641 .try_fold(self.clone(), |output, element| match element {
1642 VariantPathElement::Field { name } => output.get_object_field(name),
1643 VariantPathElement::Index { index } => output.get_list_element(*index),
1644 })
1645 }
1646}
1647
1648impl From<()> for Variant<'_, '_> {
1649 fn from((): ()) -> Self {
1650 Variant::Null
1651 }
1652}
1653
1654impl From<bool> for Variant<'_, '_> {
1655 fn from(value: bool) -> Self {
1656 match value {
1657 true => Variant::BooleanTrue,
1658 false => Variant::BooleanFalse,
1659 }
1660 }
1661}
1662
1663impl From<i8> for Variant<'_, '_> {
1664 fn from(value: i8) -> Self {
1665 Variant::Int8(value)
1666 }
1667}
1668
1669impl From<i16> for Variant<'_, '_> {
1670 fn from(value: i16) -> Self {
1671 Variant::Int16(value)
1672 }
1673}
1674
1675impl From<i32> for Variant<'_, '_> {
1676 fn from(value: i32) -> Self {
1677 Variant::Int32(value)
1678 }
1679}
1680
1681impl From<i64> for Variant<'_, '_> {
1682 fn from(value: i64) -> Self {
1683 Variant::Int64(value)
1684 }
1685}
1686
1687impl From<u8> for Variant<'_, '_> {
1688 fn from(value: u8) -> Self {
1689 // if it fits in i8, use that, otherwise use i16
1690 if let Ok(value) = i8::try_from(value) {
1691 Variant::Int8(value)
1692 } else {
1693 Variant::Int16(num_cast(value).unwrap()) // u8 -> i16 is infallible
1694 }
1695 }
1696}
1697
1698impl From<u16> for Variant<'_, '_> {
1699 fn from(value: u16) -> Self {
1700 // if it fits in i16, use that, otherwise use i32
1701 if let Ok(value) = i16::try_from(value) {
1702 Variant::Int16(value)
1703 } else {
1704 Variant::Int32(num_cast(value).unwrap()) // u16 -> i32 is infallible
1705 }
1706 }
1707}
1708impl From<u32> for Variant<'_, '_> {
1709 fn from(value: u32) -> Self {
1710 // if it fits in i32, use that, otherwise use i64
1711 if let Ok(value) = i32::try_from(value) {
1712 Variant::Int32(value)
1713 } else {
1714 Variant::Int64(num_cast(value).unwrap()) // u32 -> i64 is infallible
1715 }
1716 }
1717}
1718
1719impl From<u64> for Variant<'_, '_> {
1720 fn from(value: u64) -> Self {
1721 // if it fits in i64, use that, otherwise use Decimal16
1722 if let Ok(value) = i64::try_from(value) {
1723 Variant::Int64(value)
1724 } else {
1725 // u64 max is 18446744073709551615, which fits in i128
1726 Variant::Decimal16(VariantDecimal16::try_new(num_cast(value).unwrap(), 0).unwrap())
1727 }
1728 }
1729}
1730
1731impl From<VariantDecimal4> for Variant<'_, '_> {
1732 fn from(value: VariantDecimal4) -> Self {
1733 Variant::Decimal4(value)
1734 }
1735}
1736
1737impl From<VariantDecimal8> for Variant<'_, '_> {
1738 fn from(value: VariantDecimal8) -> Self {
1739 Variant::Decimal8(value)
1740 }
1741}
1742
1743impl From<VariantDecimal16> for Variant<'_, '_> {
1744 fn from(value: VariantDecimal16) -> Self {
1745 Variant::Decimal16(value)
1746 }
1747}
1748
1749impl From<half::f16> for Variant<'_, '_> {
1750 fn from(value: half::f16) -> Self {
1751 Variant::Float(value.into())
1752 }
1753}
1754
1755impl From<f32> for Variant<'_, '_> {
1756 fn from(value: f32) -> Self {
1757 Variant::Float(value)
1758 }
1759}
1760
1761impl From<f64> for Variant<'_, '_> {
1762 fn from(value: f64) -> Self {
1763 Variant::Double(value)
1764 }
1765}
1766
1767impl From<NaiveDate> for Variant<'_, '_> {
1768 fn from(value: NaiveDate) -> Self {
1769 Variant::Date(value)
1770 }
1771}
1772
1773impl From<DateTime<Utc>> for Variant<'_, '_> {
1774 fn from(value: DateTime<Utc>) -> Self {
1775 if value.nanosecond() % 1000 > 0 {
1776 Variant::TimestampNanos(value)
1777 } else {
1778 Variant::TimestampMicros(value)
1779 }
1780 }
1781}
1782
1783impl From<NaiveDateTime> for Variant<'_, '_> {
1784 fn from(value: NaiveDateTime) -> Self {
1785 if value.nanosecond() % 1000 > 0 {
1786 Variant::TimestampNtzNanos(value)
1787 } else {
1788 Variant::TimestampNtzMicros(value)
1789 }
1790 }
1791}
1792
1793impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
1794 fn from(value: &'v [u8]) -> Self {
1795 Variant::Binary(value)
1796 }
1797}
1798
1799impl From<NaiveTime> for Variant<'_, '_> {
1800 fn from(value: NaiveTime) -> Self {
1801 Variant::Time(value)
1802 }
1803}
1804
1805impl From<Uuid> for Variant<'_, '_> {
1806 fn from(value: Uuid) -> Self {
1807 Variant::Uuid(value)
1808 }
1809}
1810
1811impl<'v> From<&'v str> for Variant<'_, 'v> {
1812 fn from(value: &'v str) -> Self {
1813 if value.len() > MAX_SHORT_STRING_BYTES {
1814 Variant::String(value)
1815 } else {
1816 Variant::ShortString(ShortString(value))
1817 }
1818 }
1819}
1820
1821impl TryFrom<(i32, u8)> for Variant<'_, '_> {
1822 type Error = ArrowError;
1823
1824 fn try_from(value: (i32, u8)) -> Result<Self, Self::Error> {
1825 Ok(Variant::Decimal4(VariantDecimal4::try_new(
1826 value.0, value.1,
1827 )?))
1828 }
1829}
1830
1831impl TryFrom<(i64, u8)> for Variant<'_, '_> {
1832 type Error = ArrowError;
1833
1834 fn try_from(value: (i64, u8)) -> Result<Self, Self::Error> {
1835 Ok(Variant::Decimal8(VariantDecimal8::try_new(
1836 value.0, value.1,
1837 )?))
1838 }
1839}
1840
1841impl TryFrom<(i128, u8)> for Variant<'_, '_> {
1842 type Error = ArrowError;
1843
1844 fn try_from(value: (i128, u8)) -> Result<Self, Self::Error> {
1845 Ok(Variant::Decimal16(VariantDecimal16::try_new(
1846 value.0, value.1,
1847 )?))
1848 }
1849}
1850
1851// helper to print <invalid> instead of "<invalid>" in debug mode when a VariantObject or VariantList contains invalid values.
1852struct InvalidVariant;
1853
1854impl std::fmt::Debug for InvalidVariant {
1855 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1856 write!(f, "<invalid>")
1857 }
1858}
1859
1860// helper to print binary data in hex format in debug mode, as space-separated hex byte values.
1861struct HexString<'a>(&'a [u8]);
1862
1863impl<'a> std::fmt::Debug for HexString<'a> {
1864 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1865 if let Some((first, rest)) = self.0.split_first() {
1866 write!(f, "{:02x}", first)?;
1867 for b in rest {
1868 write!(f, " {:02x}", b)?;
1869 }
1870 }
1871 Ok(())
1872 }
1873}
1874
1875impl std::fmt::Debug for Variant<'_, '_> {
1876 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1877 match self {
1878 Variant::Null => write!(f, "Null"),
1879 Variant::BooleanTrue => write!(f, "BooleanTrue"),
1880 Variant::BooleanFalse => write!(f, "BooleanFalse"),
1881 Variant::Int8(v) => f.debug_tuple("Int8").field(v).finish(),
1882 Variant::Int16(v) => f.debug_tuple("Int16").field(v).finish(),
1883 Variant::Int32(v) => f.debug_tuple("Int32").field(v).finish(),
1884 Variant::Int64(v) => f.debug_tuple("Int64").field(v).finish(),
1885 Variant::Float(v) => f.debug_tuple("Float").field(v).finish(),
1886 Variant::Double(v) => f.debug_tuple("Double").field(v).finish(),
1887 Variant::Decimal4(d) => f.debug_tuple("Decimal4").field(d).finish(),
1888 Variant::Decimal8(d) => f.debug_tuple("Decimal8").field(d).finish(),
1889 Variant::Decimal16(d) => f.debug_tuple("Decimal16").field(d).finish(),
1890 Variant::Date(d) => f.debug_tuple("Date").field(d).finish(),
1891 Variant::TimestampMicros(ts) => f.debug_tuple("TimestampMicros").field(ts).finish(),
1892 Variant::TimestampNtzMicros(ts) => {
1893 f.debug_tuple("TimestampNtzMicros").field(ts).finish()
1894 }
1895 Variant::TimestampNanos(ts) => f.debug_tuple("TimestampNanos").field(ts).finish(),
1896 Variant::TimestampNtzNanos(ts) => f.debug_tuple("TimestampNtzNanos").field(ts).finish(),
1897 Variant::Binary(bytes) => write!(f, "Binary({:?})", HexString(bytes)),
1898 Variant::String(s) => f.debug_tuple("String").field(s).finish(),
1899 Variant::Time(s) => f.debug_tuple("Time").field(s).finish(),
1900 Variant::ShortString(s) => f.debug_tuple("ShortString").field(s).finish(),
1901 Variant::Uuid(uuid) => f.debug_tuple("Uuid").field(&uuid).finish(),
1902 Variant::Object(obj) => {
1903 let mut map = f.debug_map();
1904 for res in obj.iter_try() {
1905 match res {
1906 Ok((k, v)) => map.entry(&k, &v),
1907 Err(_) => map.entry(&InvalidVariant, &InvalidVariant),
1908 };
1909 }
1910 map.finish()
1911 }
1912 Variant::List(arr) => {
1913 let mut list = f.debug_list();
1914 for res in arr.iter_try() {
1915 match res {
1916 Ok(v) => list.entry(&v),
1917 Err(_) => list.entry(&InvalidVariant),
1918 };
1919 }
1920 list.finish()
1921 }
1922 }
1923 }
1924}
1925
1926#[cfg(test)]
1927mod tests {
1928
1929 use super::*;
1930
1931 #[test]
1932 fn test_empty_variant_will_fail() {
1933 let metadata = VariantMetadata::try_new(&[1, 0, 0]).unwrap();
1934
1935 let err = Variant::try_new_with_metadata(metadata, &[]).unwrap_err();
1936
1937 assert!(matches!(
1938 err,
1939 ArrowError::InvalidArgumentError(ref msg) if msg == "Received empty bytes"));
1940 }
1941
1942 #[test]
1943 fn test_construct_short_string() {
1944 let short_string = ShortString::try_new("norm").expect("should fit in short string");
1945 assert_eq!(short_string.as_str(), "norm");
1946
1947 let long_string = "a".repeat(MAX_SHORT_STRING_BYTES + 1);
1948 let res = ShortString::try_new(&long_string);
1949 assert!(res.is_err());
1950 }
1951
1952 #[test]
1953 fn test_variant_decimal_conversion() {
1954 let decimal4 = VariantDecimal4::try_new(1234_i32, 2).unwrap();
1955 let variant = Variant::from(decimal4);
1956 assert_eq!(variant.as_decimal4(), Some(decimal4));
1957
1958 let decimal8 = VariantDecimal8::try_new(12345678901_i64, 2).unwrap();
1959 let variant = Variant::from(decimal8);
1960 assert_eq!(variant.as_decimal8(), Some(decimal8));
1961
1962 let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890_i128, 2).unwrap();
1963 let variant = Variant::from(decimal16);
1964 assert_eq!(variant.as_decimal16(), Some(decimal16));
1965 }
1966
1967 #[test]
1968 fn test_variant_all_subtypes_debug() {
1969 use crate::VariantBuilder;
1970
1971 let mut builder = VariantBuilder::new();
1972
1973 // Create a root object that contains one of every variant subtype
1974 let mut root_obj = builder.new_object();
1975
1976 // Add primitive types
1977 root_obj.insert("null", ());
1978 root_obj.insert("boolean_true", true);
1979 root_obj.insert("boolean_false", false);
1980 root_obj.insert("int8", 42i8);
1981 root_obj.insert("int16", 1234i16);
1982 root_obj.insert("int32", 123456i32);
1983 root_obj.insert("int64", 1234567890123456789i64);
1984 root_obj.insert("float", 1.234f32);
1985 root_obj.insert("double", 1.23456789f64);
1986
1987 // Add date and timestamp types
1988 let date = chrono::NaiveDate::from_ymd_opt(2024, 12, 25).unwrap();
1989 root_obj.insert("date", date);
1990
1991 let timestamp_utc = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1992 .unwrap()
1993 .and_hms_milli_opt(15, 30, 45, 123)
1994 .unwrap()
1995 .and_utc();
1996 root_obj.insert("timestamp_micros", Variant::TimestampMicros(timestamp_utc));
1997
1998 let timestamp_ntz = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1999 .unwrap()
2000 .and_hms_milli_opt(15, 30, 45, 123)
2001 .unwrap();
2002 root_obj.insert(
2003 "timestamp_ntz_micros",
2004 Variant::TimestampNtzMicros(timestamp_ntz),
2005 );
2006
2007 let timestamp_nanos_utc = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
2008 .unwrap()
2009 .and_hms_nano_opt(12, 3, 4, 123456789)
2010 .unwrap()
2011 .and_utc();
2012 root_obj.insert(
2013 "timestamp_nanos",
2014 Variant::TimestampNanos(timestamp_nanos_utc),
2015 );
2016
2017 let timestamp_ntz_nanos = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
2018 .unwrap()
2019 .and_hms_nano_opt(12, 3, 4, 123456789)
2020 .unwrap();
2021 root_obj.insert(
2022 "timestamp_ntz_nanos",
2023 Variant::TimestampNtzNanos(timestamp_ntz_nanos),
2024 );
2025
2026 // Add decimal types
2027 let decimal4 = VariantDecimal4::try_new(1234i32, 2).unwrap();
2028 root_obj.insert("decimal4", decimal4);
2029
2030 let decimal8 = VariantDecimal8::try_new(123456789i64, 3).unwrap();
2031 root_obj.insert("decimal8", decimal8);
2032
2033 let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890i128, 4).unwrap();
2034 root_obj.insert("decimal16", decimal16);
2035
2036 // Add binary and string types
2037 let binary_data = b"\x01\x02\x03\x04\xde\xad\xbe\xef";
2038 root_obj.insert("binary", binary_data.as_slice());
2039
2040 let long_string =
2041 "This is a long string that exceeds the short string limit and contains emoji 🦀";
2042 root_obj.insert("string", long_string);
2043 root_obj.insert("short_string", "Short string with emoji 🎉");
2044 let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
2045 root_obj.insert("time", time);
2046
2047 // Add uuid
2048 let uuid = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
2049 root_obj.insert("uuid", Variant::Uuid(uuid));
2050
2051 // Add nested object
2052 let mut nested_obj = root_obj.new_object("nested_object");
2053 nested_obj.insert("inner_key1", "inner_value1");
2054 nested_obj.insert("inner_key2", 999i32);
2055 nested_obj.finish();
2056
2057 // Add list with mixed types
2058 let mut mixed_list = root_obj.new_list("mixed_list");
2059 mixed_list.append_value(1i32);
2060 mixed_list.append_value("two");
2061 mixed_list.append_value(true);
2062 mixed_list.append_value(4.0f32);
2063 mixed_list.append_value(());
2064
2065 // Add nested list inside the mixed list
2066 let mut nested_list = mixed_list.new_list();
2067 nested_list.append_value("nested");
2068 nested_list.append_value(10i8);
2069 nested_list.finish();
2070
2071 mixed_list.finish();
2072
2073 root_obj.finish();
2074
2075 let (metadata, value) = builder.finish();
2076 let variant = Variant::try_new(&metadata, &value).unwrap();
2077
2078 // Test Debug formatter (?)
2079 let debug_output = format!("{:?}", variant);
2080
2081 // Verify that the debug output contains all the expected types
2082 assert!(debug_output.contains("\"null\": Null"));
2083 assert!(debug_output.contains("\"boolean_true\": BooleanTrue"));
2084 assert!(debug_output.contains("\"boolean_false\": BooleanFalse"));
2085 assert!(debug_output.contains("\"int8\": Int8(42)"));
2086 assert!(debug_output.contains("\"int16\": Int16(1234)"));
2087 assert!(debug_output.contains("\"int32\": Int32(123456)"));
2088 assert!(debug_output.contains("\"int64\": Int64(1234567890123456789)"));
2089 assert!(debug_output.contains("\"float\": Float(1.234)"));
2090 assert!(debug_output.contains("\"double\": Double(1.23456789"));
2091 assert!(debug_output.contains("\"date\": Date(2024-12-25)"));
2092 assert!(debug_output.contains("\"timestamp_micros\": TimestampMicros("));
2093 assert!(debug_output.contains("\"timestamp_ntz_micros\": TimestampNtzMicros("));
2094 assert!(debug_output.contains("\"timestamp_nanos\": TimestampNanos("));
2095 assert!(debug_output.contains("\"timestamp_ntz_nanos\": TimestampNtzNanos("));
2096 assert!(debug_output.contains("\"decimal4\": Decimal4("));
2097 assert!(debug_output.contains("\"decimal8\": Decimal8("));
2098 assert!(debug_output.contains("\"decimal16\": Decimal16("));
2099 assert!(debug_output.contains("\"binary\": Binary(01 02 03 04 de ad be ef)"));
2100 assert!(debug_output.contains("\"string\": String("));
2101 assert!(debug_output.contains("\"short_string\": ShortString("));
2102 assert!(debug_output.contains("\"uuid\": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)"));
2103 assert!(debug_output.contains("\"time\": Time(01:02:03.000004)"));
2104 assert!(debug_output.contains("\"nested_object\":"));
2105 assert!(debug_output.contains("\"mixed_list\":"));
2106
2107 let expected = r#"{"binary": Binary(01 02 03 04 de ad be ef), "boolean_false": BooleanFalse, "boolean_true": BooleanTrue, "date": Date(2024-12-25), "decimal16": Decimal16(VariantDecimal16 { integer: 123456789012345678901234567890, scale: 4 }), "decimal4": Decimal4(VariantDecimal4 { integer: 1234, scale: 2 }), "decimal8": Decimal8(VariantDecimal8 { integer: 123456789, scale: 3 }), "double": Double(1.23456789), "float": Float(1.234), "int16": Int16(1234), "int32": Int32(123456), "int64": Int64(1234567890123456789), "int8": Int8(42), "mixed_list": [Int32(1), ShortString(ShortString("two")), BooleanTrue, Float(4.0), Null, [ShortString(ShortString("nested")), Int8(10)]], "nested_object": {"inner_key1": ShortString(ShortString("inner_value1")), "inner_key2": Int32(999)}, "null": Null, "short_string": ShortString(ShortString("Short string with emoji 🎉")), "string": String("This is a long string that exceeds the short string limit and contains emoji 🦀"), "time": Time(01:02:03.000004), "timestamp_micros": TimestampMicros(2024-12-25T15:30:45.123Z), "timestamp_nanos": TimestampNanos(2025-08-15T12:03:04.123456789Z), "timestamp_ntz_micros": TimestampNtzMicros(2024-12-25T15:30:45.123), "timestamp_ntz_nanos": TimestampNtzNanos(2025-08-15T12:03:04.123456789), "uuid": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)}"#;
2108 assert_eq!(debug_output, expected);
2109
2110 // Test alternate Debug formatter (#?)
2111 let alt_debug_output = format!("{:#?}", variant);
2112 let expected = r#"{
2113 "binary": Binary(01 02 03 04 de ad be ef),
2114 "boolean_false": BooleanFalse,
2115 "boolean_true": BooleanTrue,
2116 "date": Date(
2117 2024-12-25,
2118 ),
2119 "decimal16": Decimal16(
2120 VariantDecimal16 {
2121 integer: 123456789012345678901234567890,
2122 scale: 4,
2123 },
2124 ),
2125 "decimal4": Decimal4(
2126 VariantDecimal4 {
2127 integer: 1234,
2128 scale: 2,
2129 },
2130 ),
2131 "decimal8": Decimal8(
2132 VariantDecimal8 {
2133 integer: 123456789,
2134 scale: 3,
2135 },
2136 ),
2137 "double": Double(
2138 1.23456789,
2139 ),
2140 "float": Float(
2141 1.234,
2142 ),
2143 "int16": Int16(
2144 1234,
2145 ),
2146 "int32": Int32(
2147 123456,
2148 ),
2149 "int64": Int64(
2150 1234567890123456789,
2151 ),
2152 "int8": Int8(
2153 42,
2154 ),
2155 "mixed_list": [
2156 Int32(
2157 1,
2158 ),
2159 ShortString(
2160 ShortString(
2161 "two",
2162 ),
2163 ),
2164 BooleanTrue,
2165 Float(
2166 4.0,
2167 ),
2168 Null,
2169 [
2170 ShortString(
2171 ShortString(
2172 "nested",
2173 ),
2174 ),
2175 Int8(
2176 10,
2177 ),
2178 ],
2179 ],
2180 "nested_object": {
2181 "inner_key1": ShortString(
2182 ShortString(
2183 "inner_value1",
2184 ),
2185 ),
2186 "inner_key2": Int32(
2187 999,
2188 ),
2189 },
2190 "null": Null,
2191 "short_string": ShortString(
2192 ShortString(
2193 "Short string with emoji 🎉",
2194 ),
2195 ),
2196 "string": String(
2197 "This is a long string that exceeds the short string limit and contains emoji 🦀",
2198 ),
2199 "time": Time(
2200 01:02:03.000004,
2201 ),
2202 "timestamp_micros": TimestampMicros(
2203 2024-12-25T15:30:45.123Z,
2204 ),
2205 "timestamp_nanos": TimestampNanos(
2206 2025-08-15T12:03:04.123456789Z,
2207 ),
2208 "timestamp_ntz_micros": TimestampNtzMicros(
2209 2024-12-25T15:30:45.123,
2210 ),
2211 "timestamp_ntz_nanos": TimestampNtzNanos(
2212 2025-08-15T12:03:04.123456789,
2213 ),
2214 "uuid": Uuid(
2215 67e55044-10b1-426f-9247-bb680e5fe0c8,
2216 ),
2217}"#;
2218 assert_eq!(alt_debug_output, expected);
2219 }
2220}