Skip to main content

parquet_variant/
variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18pub use self::decimal::{VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantDecimalType};
19pub use self::list::VariantList;
20pub use self::metadata::{EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES, VariantMetadata};
21pub use self::object::VariantObject;
22
23// Publically export types used in the API
24pub use half::f16;
25pub use uuid::Uuid;
26
27use crate::decoder::{
28    self, VariantBasicType, VariantPrimitiveType, get_basic_type, get_primitive_type,
29};
30use crate::path::{VariantPath, VariantPathElement};
31use crate::utils::{first_byte_from_slice, slice_from_slice};
32use arrow::array::ArrowNativeTypeOp;
33use arrow::compute::{
34    DecimalCast, cast_num_to_bool, cast_single_string_to_boolean_default, num_cast,
35    parse_string_to_decimal_native, single_bool_to_numeric, single_decimal_to_float_lossy,
36    single_float_to_decimal,
37};
38use arrow::datatypes::{Decimal32Type, Decimal64Type, Decimal128Type, DecimalType};
39
40use arrow_schema::ArrowError;
41use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
42use num_traits::NumCast;
43use std::ops::Deref;
44
45mod decimal;
46mod list;
47mod metadata;
48mod object;
49
50const MAX_SHORT_STRING_BYTES: usize = 0x3F;
51
52/// A Variant [`ShortString`]
53///
54/// This implementation is a zero cost wrapper over `&str` that ensures
55/// the length of the underlying string is a valid Variant short string (63 bytes or less)
56#[derive(Debug, Clone, Copy, PartialEq)]
57pub struct ShortString<'a>(pub(crate) &'a str);
58
59impl<'a> ShortString<'a> {
60    /// Attempts to interpret `value` as a variant short string value.
61    ///
62    /// # Errors
63    ///
64    /// Returns an error if  `value` is longer than the maximum allowed length
65    /// of a Variant short string (63 bytes).
66    pub fn try_new(value: &'a str) -> Result<Self, ArrowError> {
67        if value.len() > MAX_SHORT_STRING_BYTES {
68            return Err(ArrowError::InvalidArgumentError(format!(
69                "value is larger than {MAX_SHORT_STRING_BYTES} bytes"
70            )));
71        }
72
73        Ok(Self(value))
74    }
75
76    /// Returns the underlying Variant short string as a &str
77    pub fn as_str(&self) -> &'a str {
78        self.0
79    }
80}
81
82impl<'a> From<ShortString<'a>> for &'a str {
83    fn from(value: ShortString<'a>) -> Self {
84        value.0
85    }
86}
87
88impl<'a> TryFrom<&'a str> for ShortString<'a> {
89    type Error = ArrowError;
90
91    fn try_from(value: &'a str) -> Result<Self, Self::Error> {
92        Self::try_new(value)
93    }
94}
95
96impl AsRef<str> for ShortString<'_> {
97    fn as_ref(&self) -> &str {
98        self.0
99    }
100}
101
102impl Deref for ShortString<'_> {
103    type Target = str;
104
105    fn deref(&self) -> &Self::Target {
106        self.0
107    }
108}
109
110/// Represents a [Parquet Variant]
111///
112/// The lifetimes `'m` and `'v` are for metadata and value buffers, respectively.
113///
114/// # Background
115///
116/// The [specification] says:
117///
118/// The Variant Binary Encoding allows representation of semi-structured data
119/// (e.g. JSON) in a form that can be efficiently queried by path. The design is
120/// intended to allow efficient access to nested data even in the presence of
121/// very wide or deep structures.
122///
123/// Another motivation for the representation is that (aside from metadata) each
124/// nested Variant value is contiguous and self-contained. For example, in a
125/// Variant containing an Array of Variant values, the representation of an
126/// inner Variant value, when paired with the metadata of the full variant, is
127/// itself a valid Variant.
128///
129/// When stored in Parquet files, Variant fields can also be *shredded*. Shredding
130/// refers to extracting some elements of the variant into separate columns for
131/// more efficient extraction/filter pushdown. The [Variant Shredding
132/// specification] describes the details of shredding Variant values as typed
133/// Parquet columns.
134///
135/// A Variant represents a type that contains one of:
136///
137/// * Primitive: A type and corresponding value (e.g. INT, STRING)
138///
139/// * Array: An ordered list of Variant values
140///
141/// * Object: An unordered collection of string/Variant pairs (i.e. key/value
142///   pairs). An object may not contain duplicate keys.
143///
144/// # Encoding
145///
146/// A Variant is encoded with 2 binary values, the value and the metadata. The
147/// metadata stores a header and an optional dictionary of field names which are
148/// referred to by offset in the value. The value is a binary representation of
149/// the actual data, and varies depending on the type.
150///
151/// # Design Goals
152///
153/// The design goals of the Rust API are as follows:
154/// 1. Speed / Zero copy access (no `clone`ing is required)
155/// 2. Safety
156/// 3. Follow standard Rust conventions
157///
158/// [Parquet Variant]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
159/// [specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
160/// [Variant Shredding specification]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md
161///
162/// # Casting Semantics
163///
164/// Scalar conversion semantics intentionally follow Arrow cast behavior where applicable.
165/// Conversions in this module delegate to Arrow compute cast helpers such as
166/// [`num_cast`], [`cast_num_to_bool`], [`single_bool_to_numeric`], and
167/// [`cast_single_string_to_boolean_default`].
168///
169/// - [`Self::as_boolean`] accepts boolean, numeric, and string variants.
170///   Numeric zero maps to `false`; non-zero maps to `true`. String parsing follows
171///   Arrow UTF8-to-boolean cast rules.
172/// - Numeric accessors such as [`Self::as_int8`], [`Self::as_int64`], [`Self::as_u8`],
173///   [`Self::as_u64`], [`Self::as_f16`], [`Self::as_f32`], and [`Self::as_f64`] accept
174///   boolean and numeric variants (integers, floating-point, and decimals).
175///   They return `None` when conversion is not possible.
176/// - Decimal accessors such as [`Self::as_decimal4`], [`Self::as_decimal8`], and
177///   [`Self::as_decimal16`] accept compatible decimal variants, integer variants,
178///   float variants and string variants.
179///   They return `None` when conversion is not possible.
180///
181/// # Examples:
182///
183/// ## Creating `Variant` from Rust Types
184/// ```
185/// use parquet_variant::Variant;
186/// // variants can be directly constructed
187/// let variant = Variant::Int32(123);
188/// // or constructed via `From` impls
189/// assert_eq!(variant, Variant::from(123i32));
190/// ```
191/// ## Creating `Variant` from metadata and value
192/// ```
193/// # use parquet_variant::{Variant, VariantMetadata};
194/// let metadata = [0x01, 0x00, 0x00];
195/// let value = [0x09, 0x48, 0x49];
196/// // parse the header metadata
197/// assert_eq!(
198///   Variant::from("HI"),
199///   Variant::new(&metadata, &value)
200/// );
201/// ```
202///
203/// ## Using `Variant` values
204/// ```
205/// # use parquet_variant::Variant;
206/// # let variant = Variant::Int32(123);
207/// // variants can be used in match statements like normal enums
208/// match variant {
209///   Variant::Int32(i) => println!("Integer: {}", i),
210///   Variant::String(s) => println!("String: {}", s),
211///   _ => println!("Other variant"),
212/// }
213/// ```
214///
215/// # Validation
216///
217/// Every instance of variant is either _valid_ or _invalid_. depending on whether the
218/// underlying bytes are a valid encoding of a variant value (see below).
219///
220/// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`], or [`Self::with_full_validation`]
221/// are fully _validated_. They always contain _valid_ data, and infallible accesses such as
222/// iteration and indexing are panic-free. The validation cost is `O(m + v)` where `m` and
223/// `v` are the number of bytes in the metadata and value buffers, respectively.
224///
225/// Instances produced by [`Self::new`] and [`Self::new_with_metadata`] are _unvalidated_ and so
226/// they may contain either _valid_ or _invalid_ data. Infallible accesses to variant objects and
227/// arrays, such as iteration and indexing will panic if the underlying bytes are _invalid_, and
228/// fallible alternatives are provided as panic-free alternatives. [`Self::with_full_validation`] can also be
229/// used to _validate_ an _unvalidated_ instance, if desired.
230///
231/// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller
232/// knows the underlying bytes were already validated previously, or if the caller intends to
233/// perform a small number of (fallible) accesses to a large variant value.
234///
235/// A _validated_ variant value guarantees that the associated [metadata] and all nested [object]
236/// and [array] values are _valid_. Primitive variant subtypes are always _valid_ by construction.
237///
238/// # Safety
239///
240/// Even an _invalid_ variant value is still _safe_ to use in the Rust sense. Accessing it with
241/// infallible methods may cause panics but will never lead to undefined behavior.
242///
243/// [metadata]: VariantMetadata#Validation
244/// [object]: VariantObject#Validation
245/// [array]: VariantList#Validation
246#[derive(Clone, PartialEq)]
247pub enum Variant<'m, 'v> {
248    /// Primitive type: Null
249    Null,
250    /// Primitive (type_id=1): INT(8, SIGNED)
251    Int8(i8),
252    /// Primitive (type_id=1): INT(16, SIGNED)
253    Int16(i16),
254    /// Primitive (type_id=1): INT(32, SIGNED)
255    Int32(i32),
256    /// Primitive (type_id=1): INT(64, SIGNED)
257    Int64(i64),
258    /// Primitive (type_id=1): DATE
259    Date(NaiveDate),
260    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, MICROS)
261    TimestampMicros(DateTime<Utc>),
262    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, MICROS)
263    TimestampNtzMicros(NaiveDateTime),
264    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, NANOS)
265    TimestampNanos(DateTime<Utc>),
266    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, NANOS)
267    TimestampNtzNanos(NaiveDateTime),
268    /// Primitive (type_id=1): DECIMAL(precision, scale) 32-bits
269    Decimal4(VariantDecimal4),
270    /// Primitive (type_id=1): DECIMAL(precision, scale) 64-bits
271    Decimal8(VariantDecimal8),
272    /// Primitive (type_id=1): DECIMAL(precision, scale) 128-bits
273    Decimal16(VariantDecimal16),
274    /// Primitive (type_id=1): FLOAT
275    Float(f32),
276    /// Primitive (type_id=1): DOUBLE
277    Double(f64),
278    /// Primitive (type_id=1): BOOLEAN (true)
279    BooleanTrue,
280    /// Primitive (type_id=1): BOOLEAN (false)
281    BooleanFalse,
282    // Note: only need the *value* buffer for these types
283    /// Primitive (type_id=1): BINARY
284    Binary(&'v [u8]),
285    /// Primitive (type_id=1): STRING
286    String(&'v str),
287    /// Primitive (type_id=1): TIME(isAdjustedToUTC=false, MICROS)
288    Time(NaiveTime),
289    /// Primitive (type_id=1): UUID
290    Uuid(Uuid),
291    /// Short String (type_id=2): STRING
292    ShortString(ShortString<'v>),
293    // need both metadata & value
294    /// Object (type_id=3): N/A
295    Object(VariantObject<'m, 'v>),
296    /// Array (type_id=4): N/A
297    List(VariantList<'m, 'v>),
298}
299
300// We don't want this to grow because it could hurt performance of a frequently-created type.
301#[cfg(all(target_pointer_width = "64", target_arch = "s390x"))]
302const _: () = crate::utils::expect_size_of::<Variant>(72);
303#[cfg(all(target_pointer_width = "64", not(target_arch = "s390x")))]
304const _: () = crate::utils::expect_size_of::<Variant>(80);
305#[cfg(target_pointer_width = "32")]
306const _: () = crate::utils::expect_size_of::<Variant>(48);
307
308enum NumericKind {
309    Integer,
310    Float,
311}
312
313trait DecimalCastTarget: NumCast + Default {
314    const KIND: NumericKind;
315}
316
317macro_rules! impl_decimal_cast_target {
318    ($raw_type: ident, $target_kind:expr) => {
319        impl DecimalCastTarget for $raw_type {
320            const KIND: NumericKind = $target_kind;
321        }
322    };
323}
324
325impl_decimal_cast_target!(i8, NumericKind::Integer);
326impl_decimal_cast_target!(i16, NumericKind::Integer);
327impl_decimal_cast_target!(i32, NumericKind::Integer);
328impl_decimal_cast_target!(i64, NumericKind::Integer);
329impl_decimal_cast_target!(u8, NumericKind::Integer);
330impl_decimal_cast_target!(u16, NumericKind::Integer);
331impl_decimal_cast_target!(u32, NumericKind::Integer);
332impl_decimal_cast_target!(u64, NumericKind::Integer);
333impl_decimal_cast_target!(f16, NumericKind::Float);
334impl_decimal_cast_target!(f32, NumericKind::Float);
335impl_decimal_cast_target!(f64, NumericKind::Float);
336
337impl<'m, 'v> Variant<'m, 'v> {
338    /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
339    ///
340    /// The instance is fully [validated].
341    ///
342    /// # Example
343    /// ```
344    /// use parquet_variant::{Variant, VariantMetadata};
345    /// let metadata = [0x01, 0x00, 0x00];
346    /// let value = [0x09, 0x48, 0x49];
347    /// // parse the header metadata
348    /// assert_eq!(
349    ///   Variant::from("HI"),
350    ///   Variant::try_new(&metadata, &value).unwrap()
351    /// );
352    /// ```
353    ///
354    /// [validated]: Self#Validation
355    pub fn try_new(metadata: &'m [u8], value: &'v [u8]) -> Result<Self, ArrowError> {
356        let metadata = VariantMetadata::try_new(metadata)?;
357        Self::try_new_with_metadata(metadata, value)
358    }
359
360    /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
361    ///
362    /// The instance is [unvalidated].
363    ///
364    /// # Example
365    /// ```
366    /// use parquet_variant::{Variant, VariantMetadata};
367    /// let metadata = [0x01, 0x00, 0x00];
368    /// let value = [0x09, 0x48, 0x49];
369    /// // parse the header metadata
370    /// assert_eq!(
371    ///   Variant::from("HI"),
372    ///   Variant::new(&metadata, &value)
373    /// );
374    /// ```
375    ///
376    /// [unvalidated]: Self#Validation
377    pub fn new(metadata: &'m [u8], value: &'v [u8]) -> Self {
378        let metadata = VariantMetadata::try_new_with_shallow_validation(metadata)
379            .expect("Invalid variant metadata");
380        Self::try_new_with_metadata_and_shallow_validation(metadata, value)
381            .expect("Invalid variant data")
382    }
383
384    /// Create a new variant with existing metadata.
385    ///
386    /// The instance is fully [validated].
387    ///
388    /// # Example
389    /// ```
390    /// # use parquet_variant::{Variant, VariantMetadata};
391    /// let metadata = [0x01, 0x00, 0x00];
392    /// let value = [0x09, 0x48, 0x49];
393    /// // parse the header metadata first
394    /// let metadata = VariantMetadata::new(&metadata);
395    /// assert_eq!(
396    ///   Variant::from("HI"),
397    ///   Variant::try_new_with_metadata(metadata, &value).unwrap()
398    /// );
399    /// ```
400    ///
401    /// [validated]: Self#Validation
402    pub fn try_new_with_metadata(
403        metadata: VariantMetadata<'m>,
404        value: &'v [u8],
405    ) -> Result<Self, ArrowError> {
406        Self::try_new_with_metadata_and_shallow_validation(metadata, value)?.with_full_validation()
407    }
408
409    /// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
410    ///
411    /// [unvalidated]: Self#Validation
412    pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
413        Self::try_new_with_metadata_and_shallow_validation(metadata, value)
414            .expect("Invalid variant")
415    }
416
417    // The actual constructor, which only performs shallow (constant-time) validation.
418    fn try_new_with_metadata_and_shallow_validation(
419        metadata: VariantMetadata<'m>,
420        value: &'v [u8],
421    ) -> Result<Self, ArrowError> {
422        let value_metadata = first_byte_from_slice(value)?;
423        let value_data = slice_from_slice(value, 1..)?;
424        let new_self = match get_basic_type(value_metadata) {
425            VariantBasicType::Primitive => match get_primitive_type(value_metadata)? {
426                VariantPrimitiveType::Null => Variant::Null,
427                VariantPrimitiveType::Int8 => Variant::Int8(decoder::decode_int8(value_data)?),
428                VariantPrimitiveType::Int16 => Variant::Int16(decoder::decode_int16(value_data)?),
429                VariantPrimitiveType::Int32 => Variant::Int32(decoder::decode_int32(value_data)?),
430                VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
431                VariantPrimitiveType::Decimal4 => {
432                    let (integer, scale) = decoder::decode_decimal4(value_data)?;
433                    Variant::Decimal4(VariantDecimal4::try_new(integer, scale)?)
434                }
435                VariantPrimitiveType::Decimal8 => {
436                    let (integer, scale) = decoder::decode_decimal8(value_data)?;
437                    Variant::Decimal8(VariantDecimal8::try_new(integer, scale)?)
438                }
439                VariantPrimitiveType::Decimal16 => {
440                    let (integer, scale) = decoder::decode_decimal16(value_data)?;
441                    Variant::Decimal16(VariantDecimal16::try_new(integer, scale)?)
442                }
443                VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
444                VariantPrimitiveType::Double => {
445                    Variant::Double(decoder::decode_double(value_data)?)
446                }
447                VariantPrimitiveType::BooleanTrue => Variant::BooleanTrue,
448                VariantPrimitiveType::BooleanFalse => Variant::BooleanFalse,
449                VariantPrimitiveType::Date => Variant::Date(decoder::decode_date(value_data)?),
450                VariantPrimitiveType::TimestampMicros => {
451                    Variant::TimestampMicros(decoder::decode_timestamp_micros(value_data)?)
452                }
453                VariantPrimitiveType::TimestampNtzMicros => {
454                    Variant::TimestampNtzMicros(decoder::decode_timestampntz_micros(value_data)?)
455                }
456                VariantPrimitiveType::TimestampNanos => {
457                    Variant::TimestampNanos(decoder::decode_timestamp_nanos(value_data)?)
458                }
459                VariantPrimitiveType::TimestampNtzNanos => {
460                    Variant::TimestampNtzNanos(decoder::decode_timestampntz_nanos(value_data)?)
461                }
462                VariantPrimitiveType::Uuid => Variant::Uuid(decoder::decode_uuid(value_data)?),
463                VariantPrimitiveType::Binary => {
464                    Variant::Binary(decoder::decode_binary(value_data)?)
465                }
466                VariantPrimitiveType::String => {
467                    Variant::String(decoder::decode_long_string(value_data)?)
468                }
469                VariantPrimitiveType::Time => Variant::Time(decoder::decode_time_ntz(value_data)?),
470            },
471            VariantBasicType::ShortString => {
472                Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
473            }
474            VariantBasicType::Object => Variant::Object(
475                VariantObject::try_new_with_shallow_validation(metadata, value)?,
476            ),
477            VariantBasicType::Array => Variant::List(VariantList::try_new_with_shallow_validation(
478                metadata, value,
479            )?),
480        };
481        Ok(new_self)
482    }
483
484    /// True if this variant instance has already been [validated].
485    ///
486    /// [validated]: Self#Validation
487    pub fn is_fully_validated(&self) -> bool {
488        match self {
489            Variant::List(list) => list.is_fully_validated(),
490            Variant::Object(obj) => obj.is_fully_validated(),
491            _ => true,
492        }
493    }
494
495    /// Recursively validates this variant value, ensuring that infallible access will not panic due
496    /// to invalid bytes.
497    ///
498    /// Variant leaf values are always valid by construction, but [objects] and [arrays] can be
499    /// constructed in unvalidated (and potentially invalid) state.
500    ///
501    /// If [`Self::is_fully_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)`
502    /// where `m` and `v` are the sizes of metadata and value buffers, respectively.
503    ///
504    /// [objects]: VariantObject#Validation
505    /// [arrays]: VariantList#Validation
506    pub fn with_full_validation(self) -> Result<Self, ArrowError> {
507        use Variant::*;
508        match self {
509            List(list) => list.with_full_validation().map(List),
510            Object(obj) => obj.with_full_validation().map(Object),
511            _ => Ok(self),
512        }
513    }
514
515    /// Converts this variant to `()` if it is null.
516    ///
517    /// Returns `Some(())` for null variants,
518    /// `None` for non-null variants.
519    ///
520    /// # Examples
521    ///
522    /// ```
523    /// use parquet_variant::Variant;
524    ///
525    /// // you can extract `()` from a null variant
526    /// let v1 = Variant::from(());
527    /// assert_eq!(v1.as_null(), Some(()));
528    ///
529    /// // but not from other variants
530    /// let v2 = Variant::from("hello!");
531    /// assert_eq!(v2.as_null(), None);
532    /// ```
533    pub fn as_null(&self) -> Option<()> {
534        matches!(self, Variant::Null).then_some(())
535    }
536
537    /// Converts this variant to a `bool` if possible.
538    ///
539    /// Returns `Some(bool)` for boolean, numeric and string variants,
540    /// `None` for non-boolean variants.
541    ///
542    /// # Examples
543    ///
544    /// ```
545    /// use parquet_variant::Variant;
546    ///
547    /// // you can extract a bool from the true variant
548    /// let v1 = Variant::from(true);
549    /// assert_eq!(v1.as_boolean(), Some(true));
550    ///
551    /// // and the false variant
552    /// let v2 = Variant::from(false);
553    /// assert_eq!(v2.as_boolean(), Some(false));
554    ///
555    /// // and a numeric variant
556    /// let v3 = Variant::from(3);
557    /// assert_eq!(v3.as_boolean(), Some(true));
558    ///
559    /// // and a string variant
560    /// let v4 = Variant::from("true");
561    /// assert_eq!(v4.as_boolean(), Some(true));
562    ///
563    /// // but not from other variants
564    /// let v5 = Variant::from("hello!");
565    /// assert_eq!(v5.as_boolean(), None);
566    /// ```
567    pub fn as_boolean(&self) -> Option<bool> {
568        match self {
569            Variant::BooleanTrue => Some(true),
570            Variant::BooleanFalse => Some(false),
571            Variant::Int8(i) => Some(cast_num_to_bool(*i)),
572            Variant::Int16(i) => Some(cast_num_to_bool(*i)),
573            Variant::Int32(i) => Some(cast_num_to_bool(*i)),
574            Variant::Int64(i) => Some(cast_num_to_bool(*i)),
575            Variant::Float(f) => Some(cast_num_to_bool(*f)),
576            Variant::Double(d) => Some(cast_num_to_bool(*d)),
577            Variant::ShortString(s) => cast_single_string_to_boolean_default(s.as_str()),
578            Variant::String(s) => cast_single_string_to_boolean_default(s),
579            _ => None,
580        }
581    }
582
583    /// Converts this variant to a `NaiveDate` if possible.
584    ///
585    /// Returns `Some(NaiveDate)` for date variants,
586    /// `None` for non-date variants.
587    ///
588    /// # Examples
589    ///
590    /// ```
591    /// use parquet_variant::Variant;
592    /// use chrono::NaiveDate;
593    ///
594    /// // you can extract a NaiveDate from a date variant
595    /// let date = NaiveDate::from_ymd_opt(2025, 4, 12).unwrap();
596    /// let v1 = Variant::from(date);
597    /// assert_eq!(v1.as_naive_date(), Some(date));
598    ///
599    /// // but not from other variants
600    /// let v2 = Variant::from("hello!");
601    /// assert_eq!(v2.as_naive_date(), None);
602    /// ```
603    pub fn as_naive_date(&self) -> Option<NaiveDate> {
604        if let Variant::Date(d) = self {
605            Some(*d)
606        } else {
607            None
608        }
609    }
610
611    /// Converts this variant to a `DateTime<Utc>` if possible.
612    ///
613    /// Returns `Some(DateTime<Utc>)` for [`Variant::TimestampMicros`] variants,
614    /// `None` for other variants.
615    ///
616    /// # Examples
617    ///
618    /// ```
619    /// use parquet_variant::Variant;
620    /// use chrono::NaiveDate;
621    ///
622    /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
623    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
624    ///     .unwrap()
625    ///     .and_hms_milli_opt(12, 34, 56, 780)
626    ///     .unwrap()
627    ///     .and_utc();
628    /// let v1 = Variant::from(datetime);
629    /// assert_eq!(v1.as_timestamp_micros(), Some(datetime));
630    ///
631    /// // but not for other variants.
632    /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
633    ///     .unwrap()
634    ///     .and_hms_nano_opt(12, 33, 54, 123456789)
635    ///     .unwrap()
636    ///     .and_utc();
637    /// let v2 = Variant::from(datetime_nanos);
638    /// assert_eq!(v2.as_timestamp_micros(), None);
639    /// ```
640    pub fn as_timestamp_micros(&self) -> Option<DateTime<Utc>> {
641        match *self {
642            Variant::TimestampMicros(d) => Some(d),
643            _ => None,
644        }
645    }
646
647    /// Converts this variant to a `NaiveDateTime` if possible.
648    ///
649    /// Returns `Some(NaiveDateTime)` for [`Variant::TimestampNtzMicros`] variants,
650    /// `None` for other variants.
651    ///
652    /// # Examples
653    ///
654    /// ```
655    /// use parquet_variant::Variant;
656    /// use chrono::NaiveDate;
657    ///
658    /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
659    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
660    ///     .unwrap()
661    ///     .and_hms_milli_opt(12, 34, 56, 780)
662    ///     .unwrap();
663    /// let v1 = Variant::from(datetime);
664    /// assert_eq!(v1.as_timestamp_ntz_micros(), Some(datetime));
665    ///
666    /// // but not for other variants.
667    /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
668    ///     .unwrap()
669    ///     .and_hms_nano_opt(12, 33, 54, 123456789)
670    ///     .unwrap();
671    /// let v2 = Variant::from(datetime_nanos);
672    /// assert_eq!(v2.as_timestamp_micros(), None);
673    /// ```
674    pub fn as_timestamp_ntz_micros(&self) -> Option<NaiveDateTime> {
675        match *self {
676            Variant::TimestampNtzMicros(d) => Some(d),
677            _ => None,
678        }
679    }
680
681    /// Converts this variant to a `DateTime<Utc>` if possible.
682    ///
683    /// Returns `Some(DateTime<Utc>)` for timestamp variants,
684    /// `None` for other variants.
685    ///
686    /// # Examples
687    ///
688    /// ```
689    /// use parquet_variant::Variant;
690    /// use chrono::NaiveDate;
691    ///
692    /// // you can extract a DateTime<Utc> from a UTC-adjusted nanosecond-precision variant
693    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
694    ///     .unwrap()
695    ///     .and_hms_nano_opt(12, 34, 56, 789123456)
696    ///     .unwrap()
697    ///     .and_utc();
698    /// let v1 = Variant::from(datetime);
699    /// assert_eq!(v1.as_timestamp_nanos(), Some(datetime));
700    ///
701    /// // or from UTC-adjusted microsecond-precision variant
702    /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
703    ///     .unwrap()
704    ///     .and_hms_milli_opt(12, 33, 54, 123)
705    ///     .unwrap()
706    ///     .and_utc();
707    /// // this will convert to `Variant::TimestampMicros`.
708    /// let v2 = Variant::from(datetime_micros);
709    /// assert_eq!(v2.as_timestamp_nanos(), Some(datetime_micros));
710    ///
711    /// // but not for other variants.
712    /// let v3 = Variant::from("hello!");
713    /// assert_eq!(v3.as_timestamp_nanos(), None);
714    /// ```
715    pub fn as_timestamp_nanos(&self) -> Option<DateTime<Utc>> {
716        match *self {
717            Variant::TimestampNanos(d) | Variant::TimestampMicros(d) => Some(d),
718            _ => None,
719        }
720    }
721
722    /// Converts this variant to a `NaiveDateTime` if possible.
723    ///
724    /// Returns `Some(NaiveDateTime)` for timestamp variants,
725    /// `None` for other variants.
726    ///
727    /// # Examples
728    ///
729    /// ```
730    /// use parquet_variant::Variant;
731    /// use chrono::NaiveDate;
732    ///
733    /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
734    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
735    ///     .unwrap()
736    ///     .and_hms_nano_opt(12, 34, 56, 789123456)
737    ///     .unwrap();
738    /// let v1 = Variant::from(datetime);
739    /// assert_eq!(v1.as_timestamp_ntz_nanos(), Some(datetime));
740    ///
741    /// // or from a microsecond-precision non-UTC-adjusted variant
742    /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
743    ///     .unwrap()
744    ///     .and_hms_milli_opt(12, 33, 54, 123)
745    ///     .unwrap();
746    /// // this will convert to `Variant::TimestampMicros`.
747    /// let v2 = Variant::from(datetime_micros);
748    /// assert_eq!(v2.as_timestamp_ntz_nanos(), Some(datetime_micros));
749    ///
750    /// // but not for other variants.
751    /// let v3 = Variant::from("hello!");
752    /// assert_eq!(v3.as_timestamp_ntz_nanos(), None);
753    /// ```
754    pub fn as_timestamp_ntz_nanos(&self) -> Option<NaiveDateTime> {
755        match *self {
756            Variant::TimestampNtzNanos(d) | Variant::TimestampNtzMicros(d) => Some(d),
757            _ => None,
758        }
759    }
760
761    /// Converts this variant to a `&[u8]` if possible.
762    ///
763    /// Returns `Some(&[u8])` for binary variants,
764    /// `None` for non-binary variants.
765    ///
766    /// # Examples
767    ///
768    /// ```
769    /// use parquet_variant::Variant;
770    ///
771    /// // you can extract a byte slice from a binary variant
772    /// let data = b"hello!";
773    /// let v1 = Variant::Binary(data);
774    /// assert_eq!(v1.as_u8_slice(), Some(data.as_slice()));
775    ///
776    /// // but not from other variant types
777    /// let v2 = Variant::from(123i64);
778    /// assert_eq!(v2.as_u8_slice(), None);
779    /// ```
780    pub fn as_u8_slice(&'v self) -> Option<&'v [u8]> {
781        if let Variant::Binary(d) = self {
782            Some(d)
783        } else {
784            None
785        }
786    }
787
788    /// Converts this variant to a `&str` if possible.
789    ///
790    /// Returns `Some(&str)` for string variants (both regular and short strings),
791    /// `None` for non-string variants.
792    ///
793    /// # Examples
794    ///
795    /// ```
796    /// use parquet_variant::Variant;
797    ///
798    /// // you can extract a string from string variants
799    /// let s = "hello!";
800    /// let v1 = Variant::from(s);
801    /// assert_eq!(v1.as_string(), Some(s));
802    ///
803    /// // but not from other variants
804    /// let v2 = Variant::from(123i64);
805    /// assert_eq!(v2.as_string(), None);
806    /// ```
807    pub fn as_string(&'v self) -> Option<&'v str> {
808        match self {
809            Variant::String(s) | Variant::ShortString(ShortString(s)) => Some(s),
810            _ => None,
811        }
812    }
813
814    /// Converts this variant to a `uuid hyphenated string` if possible.
815    ///
816    /// Returns `Some(String)` for UUID variants, `None` for non-UUID variants.
817    ///
818    /// # Examples
819    ///
820    /// ```
821    /// use parquet_variant::Variant;
822    ///
823    /// // You can extract a UUID from a UUID variant
824    /// let s = uuid::Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
825    /// let v1 = Variant::Uuid(s);
826    /// assert_eq!(s, v1.as_uuid().unwrap());
827    /// assert_eq!("67e55044-10b1-426f-9247-bb680e5fe0c8", v1.as_uuid().unwrap().to_string());
828    ///
829    /// //but not from other variants
830    /// let v2 = Variant::from(1234);
831    /// assert_eq!(None, v2.as_uuid())
832    /// ```
833    pub fn as_uuid(&self) -> Option<Uuid> {
834        match self {
835            Variant::Uuid(u) => Some(*u),
836            _ => None,
837        }
838    }
839
840    fn cast_decimal_to_num<D, T, F>(raw: D::Native, scale: u8, as_float: F) -> Option<T>
841    where
842        D: DecimalType,
843        D::Native: NumCast + ArrowNativeTypeOp,
844        T: DecimalCastTarget,
845        F: Fn(D::Native) -> f64,
846    {
847        let base: D::Native = NumCast::from(10)?;
848
849        let div = base.pow_checked(<u32 as From<u8>>::from(scale)).ok()?;
850        match T::KIND {
851            NumericKind::Integer => raw
852                .div_checked(div)
853                .ok()
854                .and_then(<T as NumCast>::from::<D::Native>),
855            NumericKind::Float => T::from(single_decimal_to_float_lossy::<D, _>(
856                &as_float,
857                raw,
858                <i32 as From<u8>>::from(scale),
859            )),
860        }
861    }
862
863    /// Converts a boolean or numeric variant(integers, floating-point, and decimals)
864    /// to the specified numeric type `T`.
865    ///
866    /// Uses Arrow's casting logic to perform the conversion. Returns `Some(T)` if
867    /// the conversion succeeds, `None` if the variant can't be casted to type `T`.
868    fn as_num<T>(&self) -> Option<T>
869    where
870        T: DecimalCastTarget,
871    {
872        match *self {
873            Variant::BooleanFalse => single_bool_to_numeric(false),
874            Variant::BooleanTrue => single_bool_to_numeric(true),
875            Variant::Int8(i) => num_cast(i),
876            Variant::Int16(i) => num_cast(i),
877            Variant::Int32(i) => num_cast(i),
878            Variant::Int64(i) => num_cast(i),
879            Variant::Float(f) => num_cast(f),
880            Variant::Double(d) => num_cast(d),
881            Variant::Decimal4(d) => {
882                Self::cast_decimal_to_num::<Decimal32Type, T, _>(d.integer(), d.scale(), |x| {
883                    x as f64
884                })
885            }
886            Variant::Decimal8(d) => {
887                Self::cast_decimal_to_num::<Decimal64Type, T, _>(d.integer(), d.scale(), |x| {
888                    x as f64
889                })
890            }
891            Variant::Decimal16(d) => {
892                Self::cast_decimal_to_num::<Decimal128Type, T, _>(d.integer(), d.scale(), |x| {
893                    x as f64
894                })
895            }
896            _ => None,
897        }
898    }
899
900    /// Converts this variant to an `i8` if possible.
901    ///
902    /// Returns `Some(i8)` for boolean and numeric variants(integers, floating-point,
903    /// and decimals with scale 0) that fit in `i8` range,
904    /// `None` for other variants or values that would overflow.
905    ///
906    /// # Examples
907    ///
908    /// ```
909    /// use parquet_variant::Variant;
910    ///
911    /// // you can read an int64 variant into an i8 if it fits
912    /// let v1 = Variant::from(123i64);
913    /// assert_eq!(v1.as_int8(), Some(123i8));
914    ///
915    /// // or from boolean variant
916    /// let v2 = Variant::BooleanFalse;
917    /// assert_eq!(v2.as_int8(), Some(0));
918    ///
919    /// // but not if it would overflow
920    /// let v3 = Variant::from(1234i64);
921    /// assert_eq!(v3.as_int8(), None);
922    ///
923    /// // or if the variant cannot be cast into an integer
924    /// let v4 = Variant::from("hello!");
925    /// assert_eq!(v4.as_int8(), None);
926    /// ```
927    pub fn as_int8(&self) -> Option<i8> {
928        self.as_num()
929    }
930
931    /// Converts this variant to an `i16` if possible.
932    ///
933    /// Returns `Some(i16)` for boolean and numeric variants(integers, floating-point,
934    /// and decimals with scale 0) that fit in `i16` range
935    /// `None` for other variants or values that would overflow.
936    ///
937    /// # Examples
938    ///
939    /// ```
940    /// use parquet_variant::Variant;
941    ///
942    /// // you can read an int64 variant into an i16 if it fits
943    /// let v1 = Variant::from(123i64);
944    /// assert_eq!(v1.as_int16(), Some(123i16));
945    ///
946    /// // or from boolean variant
947    /// let v2 = Variant::BooleanFalse;
948    /// assert_eq!(v2.as_int16(), Some(0));
949    ///
950    /// // but not if it would overflow
951    /// let v3 = Variant::from(123456i64);
952    /// assert_eq!(v3.as_int16(), None);
953    ///
954    /// // or if the variant cannot be cast into an integer
955    /// let v4 = Variant::from("hello!");
956    /// assert_eq!(v4.as_int16(), None);
957    /// ```
958    pub fn as_int16(&self) -> Option<i16> {
959        self.as_num()
960    }
961
962    /// Converts this variant to an `i32` if possible.
963    ///
964    /// Returns `Some(i32)` for boolean and numeric variants(integers, floating-point,
965    /// and decimals with scale 0) that fit in `i32` range
966    /// `None` for other variants or values that would overflow.
967    ///
968    /// # Examples
969    ///
970    /// ```
971    /// use parquet_variant::Variant;
972    ///
973    /// // you can read an int64 variant into an i32 if it fits
974    /// let v1 = Variant::from(123i64);
975    /// assert_eq!(v1.as_int32(), Some(123i32));
976    ///
977    /// // or from boolean variant
978    /// let v2 = Variant::BooleanFalse;
979    /// assert_eq!(v2.as_int32(), Some(0));
980    ///
981    /// // but not if it would overflow
982    /// let v3 = Variant::from(12345678901i64);
983    /// assert_eq!(v3.as_int32(), None);
984    ///
985    /// // or if the variant cannot be cast into an integer
986    /// let v4 = Variant::from("hello!");
987    /// assert_eq!(v4.as_int32(), None);
988    /// ```
989    pub fn as_int32(&self) -> Option<i32> {
990        self.as_num()
991    }
992
993    /// Converts this variant to an `i64` if possible.
994    ///
995    /// Returns `Some(i64)` for boolean and numeric variants(integers, floating-point,
996    /// and decimals with scale 0) that fit in `i64` range
997    /// `None` for other variants or values that would overflow.
998    ///
999    /// # Examples
1000    ///
1001    /// ```
1002    /// use parquet_variant::Variant;
1003    ///
1004    /// // you can read an int64 variant into an i64
1005    /// let v1 = Variant::from(123i64);
1006    /// assert_eq!(v1.as_int64(), Some(123i64));
1007    ///
1008    /// // or from boolean variant
1009    /// let v2 = Variant::BooleanFalse;
1010    /// assert_eq!(v2.as_int64(), Some(0));
1011    ///
1012    /// // but not a variant that cannot be cast into an integer
1013    /// let v3 = Variant::from("hello!");
1014    /// assert_eq!(v3.as_int64(), None);
1015    /// ```
1016    pub fn as_int64(&self) -> Option<i64> {
1017        self.as_num()
1018    }
1019
1020    /// Converts this variant to a `u8` if possible.
1021    ///
1022    /// Returns `Some(u8)` for boolean and numeric variants(integers, floating-point,
1023    /// and decimals with scale 0) that fit in `u8` range
1024    /// `None` for other variants or values that would overflow.
1025    ///
1026    /// # Examples
1027    ///
1028    /// ```
1029    ///  use parquet_variant::{Variant, VariantDecimal4};
1030    ///
1031    ///  // you can read an int64 variant into an u8
1032    ///  let v1 = Variant::from(123i64);
1033    ///  assert_eq!(v1.as_u8(), Some(123u8));
1034    ///
1035    ///  // or a Decimal4 with scale 0 into u8
1036    ///  let d = VariantDecimal4::try_new(26, 0).unwrap();
1037    ///  let v2 = Variant::from(d);
1038    ///  assert_eq!(v2.as_u8(), Some(26u8));
1039    ///
1040    ///  // or a variant that decimal with scale not equal to zero
1041    ///  let d = VariantDecimal4::try_new(123, 2).unwrap();
1042    ///  let v3 = Variant::from(d);
1043    ///  assert_eq!(v3.as_u8(), Some(1));
1044    ///
1045    /// // or from boolean variant
1046    /// let v4 = Variant::BooleanFalse;
1047    /// assert_eq!(v4.as_u8(), Some(0));
1048    ///
1049    ///  // but not a variant that can't fit into the range
1050    ///  let v5 = Variant::from(-1);
1051    ///  assert_eq!(v5.as_u8(), None);
1052    ///
1053    ///  // or not a variant that cannot be cast into an integer
1054    ///  let v6 = Variant::from("hello!");
1055    ///  assert_eq!(v6.as_u8(), None);
1056    /// ```
1057    pub fn as_u8(&self) -> Option<u8> {
1058        self.as_num()
1059    }
1060
1061    /// Converts this variant to an `u16` if possible.
1062    ///
1063    /// Returns `Some(u16)` for boolean and numeric variants(integers, floating-point,
1064    /// and decimals with scale 0) that fit in `u16` range
1065    /// `None` for other variants or values that would overflow.
1066    ///
1067    /// # Examples
1068    ///
1069    /// ```
1070    ///  use parquet_variant::{Variant, VariantDecimal4};
1071    ///
1072    ///  // you can read an int64 variant into an u16
1073    ///  let v1 = Variant::from(123i64);
1074    ///  assert_eq!(v1.as_u16(), Some(123u16));
1075    ///
1076    ///  // or a Decimal4 with scale 0 into u8
1077    ///  let d = VariantDecimal4::try_new(u16::MAX as i32, 0).unwrap();
1078    ///  let v2 = Variant::from(d);
1079    ///  assert_eq!(v2.as_u16(), Some(u16::MAX));
1080    ///
1081    ///  // or a variant that decimal with scale not equal to zero
1082    ///  let d = VariantDecimal4::try_new(123, 2).unwrap();
1083    ///  let v3 = Variant::from(d);
1084    ///  assert_eq!(v3.as_u16(), Some(1));
1085    ///
1086    /// // or from boolean variant
1087    /// let v4= Variant::BooleanFalse;
1088    /// assert_eq!(v4.as_u16(), Some(0));
1089    ///
1090    ///  // but not a variant that can't fit into the range
1091    ///  let v5 = Variant::from(-1);
1092    ///  assert_eq!(v5.as_u16(), None);
1093    ///
1094    ///  // or not a variant that cannot be cast into an integer
1095    ///  let v6 = Variant::from("hello!");
1096    ///  assert_eq!(v6.as_u16(), None);
1097    /// ```
1098    pub fn as_u16(&self) -> Option<u16> {
1099        self.as_num()
1100    }
1101
1102    /// Converts this variant to an `u32` if possible.
1103    ///
1104    /// Returns `Some(u32)` for boolean and numeric variants(integers, floating-point,
1105    /// and decimals with scale 0) that fit in `u32` range
1106    /// `None` for other variants or values that would overflow.
1107    ///
1108    /// # Examples
1109    ///
1110    /// ```
1111    ///  use parquet_variant::{Variant, VariantDecimal8};
1112    ///
1113    ///  // you can read an int64 variant into an u32
1114    ///  let v1 = Variant::from(123i64);
1115    ///  assert_eq!(v1.as_u32(), Some(123u32));
1116    ///
1117    ///  // or a Decimal4 with scale 0 into u8
1118    ///  let d = VariantDecimal8::try_new(u32::MAX as i64, 0).unwrap();
1119    ///  let v2 = Variant::from(d);
1120    ///  assert_eq!(v2.as_u32(), Some(u32::MAX));
1121    ///
1122    ///  // or a variant that decimal with scale not equal to zero
1123    ///  let d = VariantDecimal8::try_new(123, 2).unwrap();
1124    ///  let v3 = Variant::from(d);
1125    ///  assert_eq!(v3.as_u32(), Some(1));
1126    ///
1127    /// // or from boolean variant
1128    /// let v4 = Variant::BooleanFalse;
1129    /// assert_eq!(v4.as_u32(), Some(0));
1130    ///
1131    ///  // but not a variant that can't fit into the range
1132    ///  let v5 = Variant::from(-1);
1133    ///  assert_eq!(v5.as_u32(), None);
1134    ///
1135    ///  // or not a variant that cannot be cast into an integer
1136    ///  let v6 = Variant::from("hello!");
1137    ///  assert_eq!(v6.as_u32(), None);
1138    /// ```
1139    pub fn as_u32(&self) -> Option<u32> {
1140        self.as_num()
1141    }
1142
1143    /// Converts this variant to an `u64` if possible.
1144    ///
1145    /// Returns `Some(u64)` for boolean and numeric variants(integers, floating-point,
1146    /// and decimals with scale 0) that fit in `u64` range
1147    /// `None` for other variants or values that would overflow.
1148    ///
1149    /// # Examples
1150    ///
1151    /// ```
1152    ///  use parquet_variant::{Variant, VariantDecimal16};
1153    ///
1154    ///  // you can read an int64 variant into an u64
1155    ///  let v1 = Variant::from(123i64);
1156    ///  assert_eq!(v1.as_u64(), Some(123u64));
1157    ///
1158    ///  // or a Decimal16 with scale 0 into u8
1159    ///  let d = VariantDecimal16::try_new(u64::MAX as i128, 0).unwrap();
1160    ///  let v2 = Variant::from(d);
1161    ///  assert_eq!(v2.as_u64(), Some(u64::MAX));
1162    ///
1163    ///  // or a variant that decimal with scale not equal to zero
1164    /// let d = VariantDecimal16::try_new(123, 2).unwrap();
1165    ///  let v3 = Variant::from(d);
1166    ///  assert_eq!(v3.as_u64(), Some(1));
1167    ///
1168    /// // or from boolean variant
1169    /// let v4 = Variant::BooleanFalse;
1170    /// assert_eq!(v4.as_u64(), Some(0));
1171    ///
1172    ///  // but not a variant that can't fit into the range
1173    ///  let v5 = Variant::from(-1);
1174    ///  assert_eq!(v5.as_u64(), None);
1175    ///
1176    ///  // or not a variant that cannot be cast into an integer
1177    ///  let v6 = Variant::from("hello!");
1178    ///  assert_eq!(v6.as_u64(), None);
1179    /// ```
1180    pub fn as_u64(&self) -> Option<u64> {
1181        self.as_num()
1182    }
1183
1184    fn convert_string_to_decimal<D, VD>(input: &str) -> Option<VD>
1185    where
1186        D: DecimalType,
1187        VD: VariantDecimalType<Native = D::Native>,
1188        D::Native: NumCast + DecimalCast,
1189    {
1190        // find the last '.'
1191        let scale_usize = input.rsplit_once('.').map_or(0, |(_, frac)| frac.len());
1192
1193        let scale = u8::try_from(scale_usize).ok()?;
1194
1195        let raw = parse_string_to_decimal_native::<D>(input, scale_usize).ok()?;
1196        VD::try_new(raw, scale).ok()
1197    }
1198
1199    /// Converts this variant to tuple with a 4-byte unscaled value if possible.
1200    ///
1201    /// Returns `Some((i32, u8))` for decimal variants where the unscaled value
1202    /// fits in `i32` range,
1203    /// `None` for non-decimal variants or decimal values that would overflow.
1204    ///
1205    /// # Examples
1206    ///
1207    /// ```
1208    /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8};
1209    ///
1210    /// // you can extract decimal parts from smaller or equally-sized decimal variants
1211    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1212    /// assert_eq!(v1.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
1213    ///
1214    /// // and from larger decimal variants if they fit
1215    /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap());
1216    /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
1217    ///
1218    /// // or from string variants if they can be parsed as decimals
1219    /// let v3 = Variant::from("123.45");
1220    /// assert_eq!(v3.as_decimal4(), VariantDecimal4::try_new(12345, 2).ok());
1221    ///
1222    /// // but not if the value would overflow i32
1223    /// let v4 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap());
1224    /// assert_eq!(v4.as_decimal4(), None);
1225    ///
1226    /// // or if the variant is not a decimal
1227    /// let v5 = Variant::from("hello!");
1228    /// assert_eq!(v5.as_decimal4(), None);
1229    /// ```
1230    pub fn as_decimal4(&self) -> Option<VariantDecimal4> {
1231        match *self {
1232            Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1233                self.as_num::<i32>().and_then(|x| x.try_into().ok())
1234            }
1235            Variant::Float(f) => single_float_to_decimal::<Decimal32Type>(f as _, 1f64)
1236                .and_then(|x: i32| x.try_into().ok()),
1237            Variant::Double(f) => single_float_to_decimal::<Decimal32Type>(f, 1f64)
1238                .and_then(|x: i32| x.try_into().ok()),
1239            Variant::String(v) => Self::convert_string_to_decimal::<Decimal32Type, _>(v),
1240            Variant::ShortString(v) => {
1241                Self::convert_string_to_decimal::<Decimal32Type, _>(v.as_str())
1242            }
1243            Variant::Decimal4(decimal4) => Some(decimal4),
1244            Variant::Decimal8(decimal8) => decimal8.try_into().ok(),
1245            Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
1246            _ => None,
1247        }
1248    }
1249
1250    /// Converts this variant to tuple with an 8-byte unscaled value if possible.
1251    ///
1252    /// Returns `Some((i64, u8))` for decimal variants where the unscaled value
1253    /// fits in `i64` range, the scale will be 0 if the input is string variants.
1254    /// `None` for non-decimal variants or decimal values that would overflow.
1255    ///
1256    /// # Examples
1257    ///
1258    /// ```
1259    /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
1260    ///
1261    /// // you can extract decimal parts from smaller or equally-sized decimal variants
1262    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1263    /// assert_eq!(v1.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
1264    ///
1265    /// // and from larger decimal variants if they fit
1266    /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap());
1267    /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
1268    ///
1269    /// // or from string variants if they can be parsed as decimals
1270    /// let v3 = Variant::from("123.45");
1271    /// assert_eq!(v3.as_decimal8(), VariantDecimal8::try_new(12345, 2).ok());
1272    ///
1273    /// // but not if the value would overflow i64
1274    /// let v4 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap());
1275    /// assert_eq!(v4.as_decimal8(), None);
1276    ///
1277    /// // or if the variant is not a decimal
1278    /// let v5 = Variant::from("hello!");
1279    /// assert_eq!(v5.as_decimal8(), None);
1280    /// ```
1281    pub fn as_decimal8(&self) -> Option<VariantDecimal8> {
1282        match *self {
1283            Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1284                self.as_num::<i64>().and_then(|x| x.try_into().ok())
1285            }
1286            Variant::Float(f) => single_float_to_decimal::<Decimal64Type>(f as _, 1f64)
1287                .and_then(|x: i64| x.try_into().ok()),
1288            Variant::Double(f) => single_float_to_decimal::<Decimal64Type>(f, 1f64)
1289                .and_then(|x: i64| x.try_into().ok()),
1290            Variant::String(v) => Self::convert_string_to_decimal::<Decimal64Type, _>(v),
1291            Variant::ShortString(v) => {
1292                Self::convert_string_to_decimal::<Decimal64Type, _>(v.as_str())
1293            }
1294            Variant::Decimal4(decimal4) => Some(decimal4.into()),
1295            Variant::Decimal8(decimal8) => Some(decimal8),
1296            Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
1297            _ => None,
1298        }
1299    }
1300
1301    /// Converts this variant to tuple with a 16-byte unscaled value if possible.
1302    ///
1303    /// Returns `Some((i128, u8))` for decimal variants where the unscaled value
1304    /// fits in `i128` range, the scale will be 0 if the input is string variants.
1305    /// `None` for non-decimal variants or decimal values that would overflow.
1306    ///
1307    /// # Examples
1308    ///
1309    /// ```
1310    /// use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4};
1311    ///
1312    /// // you can extract decimal parts from smaller or equally-sized decimal variants
1313    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1314    /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok());
1315    ///
1316    /// // or from a string variant if it can be parsed as decimal
1317    /// let v2 = Variant::from("123.45");
1318    /// assert_eq!(v2.as_decimal16(), VariantDecimal16::try_new(12345, 2).ok());
1319    ///
1320    /// // but not if the variant is not a decimal
1321    /// let v3 = Variant::from("hello!");
1322    /// assert_eq!(v3.as_decimal16(), None);
1323    /// ```
1324    pub fn as_decimal16(&self) -> Option<VariantDecimal16> {
1325        match *self {
1326            Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1327                let x = self.as_num::<i64>()?;
1328                <i128 as From<i64>>::from(x).try_into().ok()
1329            }
1330            Variant::Float(f) => {
1331                single_float_to_decimal::<Decimal128Type>(<f64 as From<f32>>::from(f), 1f64)
1332                    .and_then(|x| x.try_into().ok())
1333            }
1334            Variant::Double(f) => {
1335                single_float_to_decimal::<Decimal128Type>(f, 1f64).and_then(|x| x.try_into().ok())
1336            }
1337            Variant::String(v) => Self::convert_string_to_decimal::<Decimal128Type, _>(v),
1338            Variant::ShortString(v) => {
1339                Self::convert_string_to_decimal::<Decimal128Type, _>(v.as_str())
1340            }
1341            Variant::Decimal4(decimal4) => Some(decimal4.into()),
1342            Variant::Decimal8(decimal8) => Some(decimal8.into()),
1343            Variant::Decimal16(decimal16) => Some(decimal16),
1344            _ => None,
1345        }
1346    }
1347
1348    /// Converts this variant to an `f16` if possible.
1349    ///
1350    /// Returns `Some(f16)` for boolean and numeric variants(integers, floating-point,
1351    /// and decimals with scale 0) that fit in `f16` range
1352    /// `None` otherwise.
1353    ///
1354    /// # Example
1355    ///
1356    /// ```
1357    /// use parquet_variant::Variant;
1358    /// use half::f16;
1359    ///
1360    /// // you can extract an f16 from a float variant
1361    /// let v1 = Variant::from(std::f32::consts::PI);
1362    /// assert_eq!(v1.as_f16(), Some(f16::from_f32(std::f32::consts::PI)));
1363    ///
1364    /// // and from a double variant (with loss of precision to nearest f16)
1365    /// let v2 = Variant::from(std::f64::consts::PI);
1366    /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI)));
1367    ///
1368    /// // and from boolean
1369    /// let v3 = Variant::BooleanTrue;
1370    /// assert_eq!(v3.as_f16(), Some(f16::from_f32(1.0)));
1371    ///
1372    /// // return inf if overflow
1373    /// let v4 = Variant::from(123456);
1374    /// assert_eq!(v4.as_f16(), Some(f16::INFINITY));
1375    ///
1376    /// // but not from other variants
1377    /// let v5 = Variant::from("hello!");
1378    /// assert_eq!(v5.as_f16(), None);
1379    pub fn as_f16(&self) -> Option<f16> {
1380        self.as_num()
1381    }
1382
1383    /// Converts this variant to an `f32` if possible.
1384    ///
1385    /// Returns `Some(f32)` for boolean and numeric variants(integers, floating-point,
1386    /// and decimals with scale 0) that fit in `f32` range
1387    /// `None` otherwise.
1388    ///
1389    /// # Examples
1390    ///
1391    /// ```
1392    /// use parquet_variant::Variant;
1393    ///
1394    /// // you can extract an f32 from a float variant
1395    /// let v1 = Variant::from(std::f32::consts::PI);
1396    /// assert_eq!(v1.as_f32(), Some(std::f32::consts::PI));
1397    ///
1398    /// // and from a double variant (with loss of precision to nearest f32)
1399    /// let v2 = Variant::from(std::f64::consts::PI);
1400    /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
1401    ///
1402    /// // and from boolean variant
1403    /// let v3 = Variant::BooleanTrue;
1404    /// assert_eq!(v3.as_f32(), Some(1.0));
1405    ///
1406    /// // and return inf if overflow
1407    /// let v4 = Variant::from(f64::MAX);
1408    /// assert_eq!(v4.as_f32(), Some(f32::INFINITY));
1409    ///
1410    /// // but not from other variants
1411    /// let v5 = Variant::from("hello!");
1412    /// assert_eq!(v5.as_f32(), None);
1413    /// ```
1414    pub fn as_f32(&self) -> Option<f32> {
1415        self.as_num()
1416    }
1417
1418    /// Converts this variant to an `f64` if possible.
1419    ///
1420    /// Returns `Some(f64)` for boolean and numeric variants(integers, floating-point,
1421    /// and decimals with scale 0) that fit in `f64` range
1422    /// `None` for other variants or can't be represented by an f64.
1423    ///
1424    /// # Examples
1425    ///
1426    /// ```
1427    /// use parquet_variant::Variant;
1428    ///
1429    /// // you can extract an f64 from a float variant
1430    /// let v1 = Variant::from(std::f32::consts::PI);
1431    /// assert_eq!(v1.as_f64(), Some(std::f32::consts::PI as f64));
1432    ///
1433    /// // and from a double variant
1434    /// let v2 = Variant::from(std::f64::consts::PI);
1435    /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
1436    ///
1437    /// // and from boolean variant
1438    /// let v3 = Variant::BooleanTrue;
1439    /// assert_eq!(v3.as_f64(), Some(1.0f64));
1440    ///
1441    /// // but not from other variants
1442    /// let v5 = Variant::from("hello!");
1443    /// assert_eq!(v5.as_f64(), None);
1444    /// ```
1445    pub fn as_f64(&self) -> Option<f64> {
1446        self.as_num()
1447    }
1448
1449    /// Converts this variant to an `Object` if it is an [`VariantObject`].
1450    ///
1451    /// Returns `Some(&VariantObject)` for object variants,
1452    /// `None` for non-object variants.
1453    ///
1454    /// See [`Self::get_path`] to dynamically traverse objects
1455    ///
1456    /// # Examples
1457    /// ```
1458    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1459    /// # let (metadata, value) = {
1460    /// # let mut builder = VariantBuilder::new();
1461    /// #   let mut obj = builder.new_object();
1462    /// #   obj.insert("name", "John");
1463    /// #   obj.finish();
1464    /// #   builder.finish()
1465    /// # };
1466    /// // object that is {"name": "John"}
1467    ///  let variant = Variant::new(&metadata, &value);
1468    /// // use the `as_object` method to access the object
1469    /// let obj = variant.as_object().expect("variant should be an object");
1470    /// assert_eq!(obj.get("name"), Some(Variant::from("John")));
1471    /// ```
1472    pub fn as_object(&'m self) -> Option<&'m VariantObject<'m, 'v>> {
1473        if let Variant::Object(obj) = self {
1474            Some(obj)
1475        } else {
1476            None
1477        }
1478    }
1479
1480    /// If this is an object and the requested field name exists, retrieves the corresponding field
1481    /// value. Otherwise, returns None.
1482    ///
1483    /// This is shorthand for [`Self::as_object`] followed by [`VariantObject::get`].
1484    ///
1485    /// # Examples
1486    /// ```
1487    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1488    /// # let mut builder = VariantBuilder::new();
1489    /// # let mut obj = builder.new_object();
1490    /// # obj.insert("name", "John");
1491    /// # obj.finish();
1492    /// # let (metadata, value) = builder.finish();
1493    /// // object that is {"name": "John"}
1494    ///  let variant = Variant::new(&metadata, &value);
1495    /// // use the `get_object_field` method to access the object
1496    /// let obj = variant.get_object_field("name");
1497    /// assert_eq!(obj, Some(Variant::from("John")));
1498    /// let obj = variant.get_object_field("foo");
1499    /// assert!(obj.is_none());
1500    /// ```
1501    pub fn get_object_field(&self, field_name: &str) -> Option<Self> {
1502        match self {
1503            Variant::Object(object) => object.get(field_name),
1504            _ => None,
1505        }
1506    }
1507
1508    /// Converts this variant to a `List` if it is a [`VariantList`].
1509    ///
1510    /// Returns `Some(&VariantList)` for list variants,
1511    /// `None` for non-list variants.
1512    ///
1513    /// See [`Self::get_path`] to dynamically traverse lists
1514    ///
1515    /// # Examples
1516    /// ```
1517    /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1518    /// # let (metadata, value) = {
1519    /// # let mut builder = VariantBuilder::new();
1520    /// #   let mut list = builder.new_list();
1521    /// #   list.append_value("John");
1522    /// #   list.append_value("Doe");
1523    /// #   list.finish();
1524    /// #   builder.finish()
1525    /// # };
1526    /// // list that is ["John", "Doe"]
1527    /// let variant = Variant::new(&metadata, &value);
1528    /// // use the `as_list` method to access the list
1529    /// let list = variant.as_list().expect("variant should be a list");
1530    /// assert_eq!(list.len(), 2);
1531    /// assert_eq!(list.get(0).unwrap(), Variant::from("John"));
1532    /// assert_eq!(list.get(1).unwrap(), Variant::from("Doe"));
1533    /// ```
1534    pub fn as_list(&'m self) -> Option<&'m VariantList<'m, 'v>> {
1535        if let Variant::List(list) = self {
1536            Some(list)
1537        } else {
1538            None
1539        }
1540    }
1541
1542    /// Converts this variant to a `NaiveTime` if possible.
1543    ///
1544    /// Returns `Some(NaiveTime)` for `Variant::Time`,
1545    /// `None` for non-Time variants.
1546    ///
1547    /// # Example
1548    ///
1549    /// ```
1550    /// use chrono::NaiveTime;
1551    /// use parquet_variant::Variant;
1552    ///
1553    /// // you can extract a `NaiveTime` from a `Variant::Time`
1554    /// let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1555    /// let v1 = Variant::from(time);
1556    /// assert_eq!(Some(time), v1.as_time_utc());
1557    ///
1558    /// // but not from other variants.
1559    /// let v2 = Variant::from("Hello");
1560    /// assert_eq!(None, v2.as_time_utc());
1561    /// ```
1562    pub fn as_time_utc(&'m self) -> Option<NaiveTime> {
1563        if let Variant::Time(time) = self {
1564            Some(*time)
1565        } else {
1566            None
1567        }
1568    }
1569
1570    /// If this is a list and the requested index is in bounds, retrieves the corresponding
1571    /// element. Otherwise, returns None.
1572    ///
1573    /// This is shorthand for [`Self::as_list`] followed by [`VariantList::get`].
1574    ///
1575    /// # Examples
1576    /// ```
1577    /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1578    /// # let mut builder = VariantBuilder::new();
1579    /// # let mut list = builder.new_list();
1580    /// # list.append_value("John");
1581    /// # list.append_value("Doe");
1582    /// # list.finish();
1583    /// # let (metadata, value) = builder.finish();
1584    /// // list that is ["John", "Doe"]
1585    /// let variant = Variant::new(&metadata, &value);
1586    /// // use the `get_list_element` method to access the list
1587    /// assert_eq!(variant.get_list_element(0), Some(Variant::from("John")));
1588    /// assert_eq!(variant.get_list_element(1), Some(Variant::from("Doe")));
1589    /// assert!(variant.get_list_element(2).is_none());
1590    /// ```
1591    pub fn get_list_element(&self, index: usize) -> Option<Self> {
1592        match self {
1593            Variant::List(list) => list.get(index),
1594            _ => None,
1595        }
1596    }
1597
1598    /// Return the metadata dictionary associated with this variant value.
1599    pub fn metadata(&self) -> &VariantMetadata<'m> {
1600        match self {
1601            Variant::Object(VariantObject { metadata, .. })
1602            | Variant::List(VariantList { metadata, .. }) => metadata,
1603            _ => &EMPTY_VARIANT_METADATA,
1604        }
1605    }
1606
1607    /// Return a new Variant with the path followed.
1608    ///
1609    /// If the path is not found, `None` is returned.
1610    ///
1611    /// # Example
1612    /// ```
1613    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath};
1614    /// # let mut builder = VariantBuilder::new();
1615    /// # let mut obj = builder.new_object();
1616    /// # let mut list = obj.new_list("foo");
1617    /// # list.append_value("bar");
1618    /// # list.append_value("baz");
1619    /// # list.finish();
1620    /// # obj.finish();
1621    /// # let (metadata, value) = builder.finish();
1622    /// // given a variant like `{"foo": ["bar", "baz"]}`
1623    /// let variant = Variant::new(&metadata, &value);
1624    /// // Accessing a non existent path returns None
1625    /// assert_eq!(variant.get_path(&VariantPath::try_from("non_existent").unwrap()), None);
1626    /// // Access obj["foo"]
1627    /// let path = VariantPath::try_from("foo").unwrap();
1628    /// let foo = variant.get_path(&path).expect("field `foo` should exist");
1629    /// assert!(foo.as_list().is_some(), "field `foo` should be a list");
1630    /// // Access foo[0]
1631    /// let path = VariantPath::from(0);
1632    /// let bar = foo.get_path(&path).expect("element 0 should exist");
1633    /// // bar is a string
1634    /// assert_eq!(bar.as_string(), Some("bar"));
1635    /// // You can also access nested paths
1636    /// let path = VariantPath::try_from("foo").unwrap().join(0);
1637    /// assert_eq!(variant.get_path(&path).unwrap(), bar);
1638    /// ```
1639    pub fn get_path(&self, path: &VariantPath) -> Option<Variant<'_, '_>> {
1640        path.iter()
1641            .try_fold(self.clone(), |output, element| match element {
1642                VariantPathElement::Field { name } => output.get_object_field(name),
1643                VariantPathElement::Index { index } => output.get_list_element(*index),
1644            })
1645    }
1646}
1647
1648impl From<()> for Variant<'_, '_> {
1649    fn from((): ()) -> Self {
1650        Variant::Null
1651    }
1652}
1653
1654impl From<bool> for Variant<'_, '_> {
1655    fn from(value: bool) -> Self {
1656        match value {
1657            true => Variant::BooleanTrue,
1658            false => Variant::BooleanFalse,
1659        }
1660    }
1661}
1662
1663impl From<i8> for Variant<'_, '_> {
1664    fn from(value: i8) -> Self {
1665        Variant::Int8(value)
1666    }
1667}
1668
1669impl From<i16> for Variant<'_, '_> {
1670    fn from(value: i16) -> Self {
1671        Variant::Int16(value)
1672    }
1673}
1674
1675impl From<i32> for Variant<'_, '_> {
1676    fn from(value: i32) -> Self {
1677        Variant::Int32(value)
1678    }
1679}
1680
1681impl From<i64> for Variant<'_, '_> {
1682    fn from(value: i64) -> Self {
1683        Variant::Int64(value)
1684    }
1685}
1686
1687impl From<u8> for Variant<'_, '_> {
1688    fn from(value: u8) -> Self {
1689        // if it fits in i8, use that, otherwise use i16
1690        if let Ok(value) = i8::try_from(value) {
1691            Variant::Int8(value)
1692        } else {
1693            Variant::Int16(num_cast(value).unwrap()) // u8 -> i16 is infallible
1694        }
1695    }
1696}
1697
1698impl From<u16> for Variant<'_, '_> {
1699    fn from(value: u16) -> Self {
1700        // if it fits in i16, use that, otherwise use i32
1701        if let Ok(value) = i16::try_from(value) {
1702            Variant::Int16(value)
1703        } else {
1704            Variant::Int32(num_cast(value).unwrap()) // u16 -> i32 is infallible
1705        }
1706    }
1707}
1708impl From<u32> for Variant<'_, '_> {
1709    fn from(value: u32) -> Self {
1710        // if it fits in i32, use that, otherwise use i64
1711        if let Ok(value) = i32::try_from(value) {
1712            Variant::Int32(value)
1713        } else {
1714            Variant::Int64(num_cast(value).unwrap()) // u32 -> i64 is infallible
1715        }
1716    }
1717}
1718
1719impl From<u64> for Variant<'_, '_> {
1720    fn from(value: u64) -> Self {
1721        // if it fits in i64, use that, otherwise use Decimal16
1722        if let Ok(value) = i64::try_from(value) {
1723            Variant::Int64(value)
1724        } else {
1725            // u64 max is 18446744073709551615, which fits in i128
1726            Variant::Decimal16(VariantDecimal16::try_new(num_cast(value).unwrap(), 0).unwrap())
1727        }
1728    }
1729}
1730
1731impl From<VariantDecimal4> for Variant<'_, '_> {
1732    fn from(value: VariantDecimal4) -> Self {
1733        Variant::Decimal4(value)
1734    }
1735}
1736
1737impl From<VariantDecimal8> for Variant<'_, '_> {
1738    fn from(value: VariantDecimal8) -> Self {
1739        Variant::Decimal8(value)
1740    }
1741}
1742
1743impl From<VariantDecimal16> for Variant<'_, '_> {
1744    fn from(value: VariantDecimal16) -> Self {
1745        Variant::Decimal16(value)
1746    }
1747}
1748
1749impl From<half::f16> for Variant<'_, '_> {
1750    fn from(value: half::f16) -> Self {
1751        Variant::Float(value.into())
1752    }
1753}
1754
1755impl From<f32> for Variant<'_, '_> {
1756    fn from(value: f32) -> Self {
1757        Variant::Float(value)
1758    }
1759}
1760
1761impl From<f64> for Variant<'_, '_> {
1762    fn from(value: f64) -> Self {
1763        Variant::Double(value)
1764    }
1765}
1766
1767impl From<NaiveDate> for Variant<'_, '_> {
1768    fn from(value: NaiveDate) -> Self {
1769        Variant::Date(value)
1770    }
1771}
1772
1773impl From<DateTime<Utc>> for Variant<'_, '_> {
1774    fn from(value: DateTime<Utc>) -> Self {
1775        if value.nanosecond() % 1000 > 0 {
1776            Variant::TimestampNanos(value)
1777        } else {
1778            Variant::TimestampMicros(value)
1779        }
1780    }
1781}
1782
1783impl From<NaiveDateTime> for Variant<'_, '_> {
1784    fn from(value: NaiveDateTime) -> Self {
1785        if value.nanosecond() % 1000 > 0 {
1786            Variant::TimestampNtzNanos(value)
1787        } else {
1788            Variant::TimestampNtzMicros(value)
1789        }
1790    }
1791}
1792
1793impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
1794    fn from(value: &'v [u8]) -> Self {
1795        Variant::Binary(value)
1796    }
1797}
1798
1799impl From<NaiveTime> for Variant<'_, '_> {
1800    fn from(value: NaiveTime) -> Self {
1801        Variant::Time(value)
1802    }
1803}
1804
1805impl From<Uuid> for Variant<'_, '_> {
1806    fn from(value: Uuid) -> Self {
1807        Variant::Uuid(value)
1808    }
1809}
1810
1811impl<'v> From<&'v str> for Variant<'_, 'v> {
1812    fn from(value: &'v str) -> Self {
1813        if value.len() > MAX_SHORT_STRING_BYTES {
1814            Variant::String(value)
1815        } else {
1816            Variant::ShortString(ShortString(value))
1817        }
1818    }
1819}
1820
1821impl TryFrom<(i32, u8)> for Variant<'_, '_> {
1822    type Error = ArrowError;
1823
1824    fn try_from(value: (i32, u8)) -> Result<Self, Self::Error> {
1825        Ok(Variant::Decimal4(VariantDecimal4::try_new(
1826            value.0, value.1,
1827        )?))
1828    }
1829}
1830
1831impl TryFrom<(i64, u8)> for Variant<'_, '_> {
1832    type Error = ArrowError;
1833
1834    fn try_from(value: (i64, u8)) -> Result<Self, Self::Error> {
1835        Ok(Variant::Decimal8(VariantDecimal8::try_new(
1836            value.0, value.1,
1837        )?))
1838    }
1839}
1840
1841impl TryFrom<(i128, u8)> for Variant<'_, '_> {
1842    type Error = ArrowError;
1843
1844    fn try_from(value: (i128, u8)) -> Result<Self, Self::Error> {
1845        Ok(Variant::Decimal16(VariantDecimal16::try_new(
1846            value.0, value.1,
1847        )?))
1848    }
1849}
1850
1851// helper to print <invalid> instead of "<invalid>" in debug mode when a VariantObject or VariantList contains invalid values.
1852struct InvalidVariant;
1853
1854impl std::fmt::Debug for InvalidVariant {
1855    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1856        write!(f, "<invalid>")
1857    }
1858}
1859
1860// helper to print binary data in hex format in debug mode, as space-separated hex byte values.
1861struct HexString<'a>(&'a [u8]);
1862
1863impl<'a> std::fmt::Debug for HexString<'a> {
1864    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1865        if let Some((first, rest)) = self.0.split_first() {
1866            write!(f, "{:02x}", first)?;
1867            for b in rest {
1868                write!(f, " {:02x}", b)?;
1869            }
1870        }
1871        Ok(())
1872    }
1873}
1874
1875impl std::fmt::Debug for Variant<'_, '_> {
1876    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1877        match self {
1878            Variant::Null => write!(f, "Null"),
1879            Variant::BooleanTrue => write!(f, "BooleanTrue"),
1880            Variant::BooleanFalse => write!(f, "BooleanFalse"),
1881            Variant::Int8(v) => f.debug_tuple("Int8").field(v).finish(),
1882            Variant::Int16(v) => f.debug_tuple("Int16").field(v).finish(),
1883            Variant::Int32(v) => f.debug_tuple("Int32").field(v).finish(),
1884            Variant::Int64(v) => f.debug_tuple("Int64").field(v).finish(),
1885            Variant::Float(v) => f.debug_tuple("Float").field(v).finish(),
1886            Variant::Double(v) => f.debug_tuple("Double").field(v).finish(),
1887            Variant::Decimal4(d) => f.debug_tuple("Decimal4").field(d).finish(),
1888            Variant::Decimal8(d) => f.debug_tuple("Decimal8").field(d).finish(),
1889            Variant::Decimal16(d) => f.debug_tuple("Decimal16").field(d).finish(),
1890            Variant::Date(d) => f.debug_tuple("Date").field(d).finish(),
1891            Variant::TimestampMicros(ts) => f.debug_tuple("TimestampMicros").field(ts).finish(),
1892            Variant::TimestampNtzMicros(ts) => {
1893                f.debug_tuple("TimestampNtzMicros").field(ts).finish()
1894            }
1895            Variant::TimestampNanos(ts) => f.debug_tuple("TimestampNanos").field(ts).finish(),
1896            Variant::TimestampNtzNanos(ts) => f.debug_tuple("TimestampNtzNanos").field(ts).finish(),
1897            Variant::Binary(bytes) => write!(f, "Binary({:?})", HexString(bytes)),
1898            Variant::String(s) => f.debug_tuple("String").field(s).finish(),
1899            Variant::Time(s) => f.debug_tuple("Time").field(s).finish(),
1900            Variant::ShortString(s) => f.debug_tuple("ShortString").field(s).finish(),
1901            Variant::Uuid(uuid) => f.debug_tuple("Uuid").field(&uuid).finish(),
1902            Variant::Object(obj) => {
1903                let mut map = f.debug_map();
1904                for res in obj.iter_try() {
1905                    match res {
1906                        Ok((k, v)) => map.entry(&k, &v),
1907                        Err(_) => map.entry(&InvalidVariant, &InvalidVariant),
1908                    };
1909                }
1910                map.finish()
1911            }
1912            Variant::List(arr) => {
1913                let mut list = f.debug_list();
1914                for res in arr.iter_try() {
1915                    match res {
1916                        Ok(v) => list.entry(&v),
1917                        Err(_) => list.entry(&InvalidVariant),
1918                    };
1919                }
1920                list.finish()
1921            }
1922        }
1923    }
1924}
1925
1926#[cfg(test)]
1927mod tests {
1928
1929    use super::*;
1930
1931    #[test]
1932    fn test_empty_variant_will_fail() {
1933        let metadata = VariantMetadata::try_new(&[1, 0, 0]).unwrap();
1934
1935        let err = Variant::try_new_with_metadata(metadata, &[]).unwrap_err();
1936
1937        assert!(matches!(
1938            err,
1939            ArrowError::InvalidArgumentError(ref msg) if msg == "Received empty bytes"));
1940    }
1941
1942    #[test]
1943    fn test_construct_short_string() {
1944        let short_string = ShortString::try_new("norm").expect("should fit in short string");
1945        assert_eq!(short_string.as_str(), "norm");
1946
1947        let long_string = "a".repeat(MAX_SHORT_STRING_BYTES + 1);
1948        let res = ShortString::try_new(&long_string);
1949        assert!(res.is_err());
1950    }
1951
1952    #[test]
1953    fn test_variant_decimal_conversion() {
1954        let decimal4 = VariantDecimal4::try_new(1234_i32, 2).unwrap();
1955        let variant = Variant::from(decimal4);
1956        assert_eq!(variant.as_decimal4(), Some(decimal4));
1957
1958        let decimal8 = VariantDecimal8::try_new(12345678901_i64, 2).unwrap();
1959        let variant = Variant::from(decimal8);
1960        assert_eq!(variant.as_decimal8(), Some(decimal8));
1961
1962        let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890_i128, 2).unwrap();
1963        let variant = Variant::from(decimal16);
1964        assert_eq!(variant.as_decimal16(), Some(decimal16));
1965    }
1966
1967    #[test]
1968    fn test_variant_all_subtypes_debug() {
1969        use crate::VariantBuilder;
1970
1971        let mut builder = VariantBuilder::new();
1972
1973        // Create a root object that contains one of every variant subtype
1974        let mut root_obj = builder.new_object();
1975
1976        // Add primitive types
1977        root_obj.insert("null", ());
1978        root_obj.insert("boolean_true", true);
1979        root_obj.insert("boolean_false", false);
1980        root_obj.insert("int8", 42i8);
1981        root_obj.insert("int16", 1234i16);
1982        root_obj.insert("int32", 123456i32);
1983        root_obj.insert("int64", 1234567890123456789i64);
1984        root_obj.insert("float", 1.234f32);
1985        root_obj.insert("double", 1.23456789f64);
1986
1987        // Add date and timestamp types
1988        let date = chrono::NaiveDate::from_ymd_opt(2024, 12, 25).unwrap();
1989        root_obj.insert("date", date);
1990
1991        let timestamp_utc = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1992            .unwrap()
1993            .and_hms_milli_opt(15, 30, 45, 123)
1994            .unwrap()
1995            .and_utc();
1996        root_obj.insert("timestamp_micros", Variant::TimestampMicros(timestamp_utc));
1997
1998        let timestamp_ntz = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1999            .unwrap()
2000            .and_hms_milli_opt(15, 30, 45, 123)
2001            .unwrap();
2002        root_obj.insert(
2003            "timestamp_ntz_micros",
2004            Variant::TimestampNtzMicros(timestamp_ntz),
2005        );
2006
2007        let timestamp_nanos_utc = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
2008            .unwrap()
2009            .and_hms_nano_opt(12, 3, 4, 123456789)
2010            .unwrap()
2011            .and_utc();
2012        root_obj.insert(
2013            "timestamp_nanos",
2014            Variant::TimestampNanos(timestamp_nanos_utc),
2015        );
2016
2017        let timestamp_ntz_nanos = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
2018            .unwrap()
2019            .and_hms_nano_opt(12, 3, 4, 123456789)
2020            .unwrap();
2021        root_obj.insert(
2022            "timestamp_ntz_nanos",
2023            Variant::TimestampNtzNanos(timestamp_ntz_nanos),
2024        );
2025
2026        // Add decimal types
2027        let decimal4 = VariantDecimal4::try_new(1234i32, 2).unwrap();
2028        root_obj.insert("decimal4", decimal4);
2029
2030        let decimal8 = VariantDecimal8::try_new(123456789i64, 3).unwrap();
2031        root_obj.insert("decimal8", decimal8);
2032
2033        let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890i128, 4).unwrap();
2034        root_obj.insert("decimal16", decimal16);
2035
2036        // Add binary and string types
2037        let binary_data = b"\x01\x02\x03\x04\xde\xad\xbe\xef";
2038        root_obj.insert("binary", binary_data.as_slice());
2039
2040        let long_string =
2041            "This is a long string that exceeds the short string limit and contains emoji 🦀";
2042        root_obj.insert("string", long_string);
2043        root_obj.insert("short_string", "Short string with emoji 🎉");
2044        let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
2045        root_obj.insert("time", time);
2046
2047        // Add uuid
2048        let uuid = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
2049        root_obj.insert("uuid", Variant::Uuid(uuid));
2050
2051        // Add nested object
2052        let mut nested_obj = root_obj.new_object("nested_object");
2053        nested_obj.insert("inner_key1", "inner_value1");
2054        nested_obj.insert("inner_key2", 999i32);
2055        nested_obj.finish();
2056
2057        // Add list with mixed types
2058        let mut mixed_list = root_obj.new_list("mixed_list");
2059        mixed_list.append_value(1i32);
2060        mixed_list.append_value("two");
2061        mixed_list.append_value(true);
2062        mixed_list.append_value(4.0f32);
2063        mixed_list.append_value(());
2064
2065        // Add nested list inside the mixed list
2066        let mut nested_list = mixed_list.new_list();
2067        nested_list.append_value("nested");
2068        nested_list.append_value(10i8);
2069        nested_list.finish();
2070
2071        mixed_list.finish();
2072
2073        root_obj.finish();
2074
2075        let (metadata, value) = builder.finish();
2076        let variant = Variant::try_new(&metadata, &value).unwrap();
2077
2078        // Test Debug formatter (?)
2079        let debug_output = format!("{:?}", variant);
2080
2081        // Verify that the debug output contains all the expected types
2082        assert!(debug_output.contains("\"null\": Null"));
2083        assert!(debug_output.contains("\"boolean_true\": BooleanTrue"));
2084        assert!(debug_output.contains("\"boolean_false\": BooleanFalse"));
2085        assert!(debug_output.contains("\"int8\": Int8(42)"));
2086        assert!(debug_output.contains("\"int16\": Int16(1234)"));
2087        assert!(debug_output.contains("\"int32\": Int32(123456)"));
2088        assert!(debug_output.contains("\"int64\": Int64(1234567890123456789)"));
2089        assert!(debug_output.contains("\"float\": Float(1.234)"));
2090        assert!(debug_output.contains("\"double\": Double(1.23456789"));
2091        assert!(debug_output.contains("\"date\": Date(2024-12-25)"));
2092        assert!(debug_output.contains("\"timestamp_micros\": TimestampMicros("));
2093        assert!(debug_output.contains("\"timestamp_ntz_micros\": TimestampNtzMicros("));
2094        assert!(debug_output.contains("\"timestamp_nanos\": TimestampNanos("));
2095        assert!(debug_output.contains("\"timestamp_ntz_nanos\": TimestampNtzNanos("));
2096        assert!(debug_output.contains("\"decimal4\": Decimal4("));
2097        assert!(debug_output.contains("\"decimal8\": Decimal8("));
2098        assert!(debug_output.contains("\"decimal16\": Decimal16("));
2099        assert!(debug_output.contains("\"binary\": Binary(01 02 03 04 de ad be ef)"));
2100        assert!(debug_output.contains("\"string\": String("));
2101        assert!(debug_output.contains("\"short_string\": ShortString("));
2102        assert!(debug_output.contains("\"uuid\": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)"));
2103        assert!(debug_output.contains("\"time\": Time(01:02:03.000004)"));
2104        assert!(debug_output.contains("\"nested_object\":"));
2105        assert!(debug_output.contains("\"mixed_list\":"));
2106
2107        let expected = r#"{"binary": Binary(01 02 03 04 de ad be ef), "boolean_false": BooleanFalse, "boolean_true": BooleanTrue, "date": Date(2024-12-25), "decimal16": Decimal16(VariantDecimal16 { integer: 123456789012345678901234567890, scale: 4 }), "decimal4": Decimal4(VariantDecimal4 { integer: 1234, scale: 2 }), "decimal8": Decimal8(VariantDecimal8 { integer: 123456789, scale: 3 }), "double": Double(1.23456789), "float": Float(1.234), "int16": Int16(1234), "int32": Int32(123456), "int64": Int64(1234567890123456789), "int8": Int8(42), "mixed_list": [Int32(1), ShortString(ShortString("two")), BooleanTrue, Float(4.0), Null, [ShortString(ShortString("nested")), Int8(10)]], "nested_object": {"inner_key1": ShortString(ShortString("inner_value1")), "inner_key2": Int32(999)}, "null": Null, "short_string": ShortString(ShortString("Short string with emoji 🎉")), "string": String("This is a long string that exceeds the short string limit and contains emoji 🦀"), "time": Time(01:02:03.000004), "timestamp_micros": TimestampMicros(2024-12-25T15:30:45.123Z), "timestamp_nanos": TimestampNanos(2025-08-15T12:03:04.123456789Z), "timestamp_ntz_micros": TimestampNtzMicros(2024-12-25T15:30:45.123), "timestamp_ntz_nanos": TimestampNtzNanos(2025-08-15T12:03:04.123456789), "uuid": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)}"#;
2108        assert_eq!(debug_output, expected);
2109
2110        // Test alternate Debug formatter (#?)
2111        let alt_debug_output = format!("{:#?}", variant);
2112        let expected = r#"{
2113    "binary": Binary(01 02 03 04 de ad be ef),
2114    "boolean_false": BooleanFalse,
2115    "boolean_true": BooleanTrue,
2116    "date": Date(
2117        2024-12-25,
2118    ),
2119    "decimal16": Decimal16(
2120        VariantDecimal16 {
2121            integer: 123456789012345678901234567890,
2122            scale: 4,
2123        },
2124    ),
2125    "decimal4": Decimal4(
2126        VariantDecimal4 {
2127            integer: 1234,
2128            scale: 2,
2129        },
2130    ),
2131    "decimal8": Decimal8(
2132        VariantDecimal8 {
2133            integer: 123456789,
2134            scale: 3,
2135        },
2136    ),
2137    "double": Double(
2138        1.23456789,
2139    ),
2140    "float": Float(
2141        1.234,
2142    ),
2143    "int16": Int16(
2144        1234,
2145    ),
2146    "int32": Int32(
2147        123456,
2148    ),
2149    "int64": Int64(
2150        1234567890123456789,
2151    ),
2152    "int8": Int8(
2153        42,
2154    ),
2155    "mixed_list": [
2156        Int32(
2157            1,
2158        ),
2159        ShortString(
2160            ShortString(
2161                "two",
2162            ),
2163        ),
2164        BooleanTrue,
2165        Float(
2166            4.0,
2167        ),
2168        Null,
2169        [
2170            ShortString(
2171                ShortString(
2172                    "nested",
2173                ),
2174            ),
2175            Int8(
2176                10,
2177            ),
2178        ],
2179    ],
2180    "nested_object": {
2181        "inner_key1": ShortString(
2182            ShortString(
2183                "inner_value1",
2184            ),
2185        ),
2186        "inner_key2": Int32(
2187            999,
2188        ),
2189    },
2190    "null": Null,
2191    "short_string": ShortString(
2192        ShortString(
2193            "Short string with emoji 🎉",
2194        ),
2195    ),
2196    "string": String(
2197        "This is a long string that exceeds the short string limit and contains emoji 🦀",
2198    ),
2199    "time": Time(
2200        01:02:03.000004,
2201    ),
2202    "timestamp_micros": TimestampMicros(
2203        2024-12-25T15:30:45.123Z,
2204    ),
2205    "timestamp_nanos": TimestampNanos(
2206        2025-08-15T12:03:04.123456789Z,
2207    ),
2208    "timestamp_ntz_micros": TimestampNtzMicros(
2209        2024-12-25T15:30:45.123,
2210    ),
2211    "timestamp_ntz_nanos": TimestampNtzNanos(
2212        2025-08-15T12:03:04.123456789,
2213    ),
2214    "uuid": Uuid(
2215        67e55044-10b1-426f-9247-bb680e5fe0c8,
2216    ),
2217}"#;
2218        assert_eq!(alt_debug_output, expected);
2219    }
2220}