Skip to main content

parquet_variant/
variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18pub use self::decimal::{VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantDecimalType};
19pub use self::list::VariantList;
20pub use self::metadata::{EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES, VariantMetadata};
21pub use self::object::VariantObject;
22
23// Publically export types used in the API
24pub use half::f16;
25pub use uuid::Uuid;
26
27use crate::decoder::{
28    self, VariantBasicType, VariantPrimitiveType, get_basic_type, get_primitive_type,
29};
30use crate::path::{VariantPath, VariantPathElement};
31use crate::utils::{first_byte_from_slice, slice_from_slice};
32use arrow::compute::{
33    cast_num_to_bool, cast_single_string_to_boolean_default, num_cast, single_bool_to_numeric,
34};
35use arrow_schema::ArrowError;
36use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
37use num_traits::NumCast;
38use std::ops::Deref;
39
40mod decimal;
41mod list;
42mod metadata;
43mod object;
44
45const MAX_SHORT_STRING_BYTES: usize = 0x3F;
46
47/// A Variant [`ShortString`]
48///
49/// This implementation is a zero cost wrapper over `&str` that ensures
50/// the length of the underlying string is a valid Variant short string (63 bytes or less)
51#[derive(Debug, Clone, Copy, PartialEq)]
52pub struct ShortString<'a>(pub(crate) &'a str);
53
54impl<'a> ShortString<'a> {
55    /// Attempts to interpret `value` as a variant short string value.
56    ///
57    /// # Errors
58    ///
59    /// Returns an error if  `value` is longer than the maximum allowed length
60    /// of a Variant short string (63 bytes).
61    pub fn try_new(value: &'a str) -> Result<Self, ArrowError> {
62        if value.len() > MAX_SHORT_STRING_BYTES {
63            return Err(ArrowError::InvalidArgumentError(format!(
64                "value is larger than {MAX_SHORT_STRING_BYTES} bytes"
65            )));
66        }
67
68        Ok(Self(value))
69    }
70
71    /// Returns the underlying Variant short string as a &str
72    pub fn as_str(&self) -> &'a str {
73        self.0
74    }
75}
76
77impl<'a> From<ShortString<'a>> for &'a str {
78    fn from(value: ShortString<'a>) -> Self {
79        value.0
80    }
81}
82
83impl<'a> TryFrom<&'a str> for ShortString<'a> {
84    type Error = ArrowError;
85
86    fn try_from(value: &'a str) -> Result<Self, Self::Error> {
87        Self::try_new(value)
88    }
89}
90
91impl AsRef<str> for ShortString<'_> {
92    fn as_ref(&self) -> &str {
93        self.0
94    }
95}
96
97impl Deref for ShortString<'_> {
98    type Target = str;
99
100    fn deref(&self) -> &Self::Target {
101        self.0
102    }
103}
104
105/// Represents a [Parquet Variant]
106///
107/// The lifetimes `'m` and `'v` are for metadata and value buffers, respectively.
108///
109/// # Background
110///
111/// The [specification] says:
112///
113/// The Variant Binary Encoding allows representation of semi-structured data
114/// (e.g. JSON) in a form that can be efficiently queried by path. The design is
115/// intended to allow efficient access to nested data even in the presence of
116/// very wide or deep structures.
117///
118/// Another motivation for the representation is that (aside from metadata) each
119/// nested Variant value is contiguous and self-contained. For example, in a
120/// Variant containing an Array of Variant values, the representation of an
121/// inner Variant value, when paired with the metadata of the full variant, is
122/// itself a valid Variant.
123///
124/// When stored in Parquet files, Variant fields can also be *shredded*. Shredding
125/// refers to extracting some elements of the variant into separate columns for
126/// more efficient extraction/filter pushdown. The [Variant Shredding
127/// specification] describes the details of shredding Variant values as typed
128/// Parquet columns.
129///
130/// A Variant represents a type that contains one of:
131///
132/// * Primitive: A type and corresponding value (e.g. INT, STRING)
133///
134/// * Array: An ordered list of Variant values
135///
136/// * Object: An unordered collection of string/Variant pairs (i.e. key/value
137///   pairs). An object may not contain duplicate keys.
138///
139/// # Encoding
140///
141/// A Variant is encoded with 2 binary values, the value and the metadata. The
142/// metadata stores a header and an optional dictionary of field names which are
143/// referred to by offset in the value. The value is a binary representation of
144/// the actual data, and varies depending on the type.
145///
146/// # Design Goals
147///
148/// The design goals of the Rust API are as follows:
149/// 1. Speed / Zero copy access (no `clone`ing is required)
150/// 2. Safety
151/// 3. Follow standard Rust conventions
152///
153/// [Parquet Variant]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
154/// [specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
155/// [Variant Shredding specification]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md
156///
157/// # Casting Semantics
158///
159/// Scalar conversion semantics intentionally follow Arrow cast behavior where applicable.
160/// Conversions in this module delegate to Arrow compute cast helpers such as
161/// [`num_cast`], [`cast_num_to_bool`], [`single_bool_to_numeric`], and
162/// [`cast_single_string_to_boolean_default`].
163///
164/// - [`Self::as_boolean`] accepts boolean, numeric, and string variants.
165///   Numeric zero maps to `false`; non-zero maps to `true`. String parsing follows
166///   Arrow UTF8-to-boolean cast rules.
167/// - Numeric accessors such as [`Self::as_int8`], [`Self::as_int64`], [`Self::as_u8`],
168///   [`Self::as_u64`], [`Self::as_f16`], [`Self::as_f32`], and [`Self::as_f64`] accept
169///   boolean and numeric variants (integers, floating-point, and decimals with scale `0`).
170///   They return `None` when conversion is not possible.
171/// - Decimal accessors such as [`Self::as_decimal4`], [`Self::as_decimal8`], and
172///   [`Self::as_decimal16`] accept compatible decimal variants and integer variants.
173///   They return `None` when conversion is not possible.
174///
175/// # Examples:
176///
177/// ## Creating `Variant` from Rust Types
178/// ```
179/// use parquet_variant::Variant;
180/// // variants can be directly constructed
181/// let variant = Variant::Int32(123);
182/// // or constructed via `From` impls
183/// assert_eq!(variant, Variant::from(123i32));
184/// ```
185/// ## Creating `Variant` from metadata and value
186/// ```
187/// # use parquet_variant::{Variant, VariantMetadata};
188/// let metadata = [0x01, 0x00, 0x00];
189/// let value = [0x09, 0x48, 0x49];
190/// // parse the header metadata
191/// assert_eq!(
192///   Variant::from("HI"),
193///   Variant::new(&metadata, &value)
194/// );
195/// ```
196///
197/// ## Using `Variant` values
198/// ```
199/// # use parquet_variant::Variant;
200/// # let variant = Variant::Int32(123);
201/// // variants can be used in match statements like normal enums
202/// match variant {
203///   Variant::Int32(i) => println!("Integer: {}", i),
204///   Variant::String(s) => println!("String: {}", s),
205///   _ => println!("Other variant"),
206/// }
207/// ```
208///
209/// # Validation
210///
211/// Every instance of variant is either _valid_ or _invalid_. depending on whether the
212/// underlying bytes are a valid encoding of a variant value (see below).
213///
214/// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`], or [`Self::with_full_validation`]
215/// are fully _validated_. They always contain _valid_ data, and infallible accesses such as
216/// iteration and indexing are panic-free. The validation cost is `O(m + v)` where `m` and
217/// `v` are the number of bytes in the metadata and value buffers, respectively.
218///
219/// Instances produced by [`Self::new`] and [`Self::new_with_metadata`] are _unvalidated_ and so
220/// they may contain either _valid_ or _invalid_ data. Infallible accesses to variant objects and
221/// arrays, such as iteration and indexing will panic if the underlying bytes are _invalid_, and
222/// fallible alternatives are provided as panic-free alternatives. [`Self::with_full_validation`] can also be
223/// used to _validate_ an _unvalidated_ instance, if desired.
224///
225/// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller
226/// knows the underlying bytes were already validated previously, or if the caller intends to
227/// perform a small number of (fallible) accesses to a large variant value.
228///
229/// A _validated_ variant value guarantees that the associated [metadata] and all nested [object]
230/// and [array] values are _valid_. Primitive variant subtypes are always _valid_ by construction.
231///
232/// # Safety
233///
234/// Even an _invalid_ variant value is still _safe_ to use in the Rust sense. Accessing it with
235/// infallible methods may cause panics but will never lead to undefined behavior.
236///
237/// [metadata]: VariantMetadata#Validation
238/// [object]: VariantObject#Validation
239/// [array]: VariantList#Validation
240#[derive(Clone, PartialEq)]
241pub enum Variant<'m, 'v> {
242    /// Primitive type: Null
243    Null,
244    /// Primitive (type_id=1): INT(8, SIGNED)
245    Int8(i8),
246    /// Primitive (type_id=1): INT(16, SIGNED)
247    Int16(i16),
248    /// Primitive (type_id=1): INT(32, SIGNED)
249    Int32(i32),
250    /// Primitive (type_id=1): INT(64, SIGNED)
251    Int64(i64),
252    /// Primitive (type_id=1): DATE
253    Date(NaiveDate),
254    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, MICROS)
255    TimestampMicros(DateTime<Utc>),
256    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, MICROS)
257    TimestampNtzMicros(NaiveDateTime),
258    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, NANOS)
259    TimestampNanos(DateTime<Utc>),
260    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, NANOS)
261    TimestampNtzNanos(NaiveDateTime),
262    /// Primitive (type_id=1): DECIMAL(precision, scale) 32-bits
263    Decimal4(VariantDecimal4),
264    /// Primitive (type_id=1): DECIMAL(precision, scale) 64-bits
265    Decimal8(VariantDecimal8),
266    /// Primitive (type_id=1): DECIMAL(precision, scale) 128-bits
267    Decimal16(VariantDecimal16),
268    /// Primitive (type_id=1): FLOAT
269    Float(f32),
270    /// Primitive (type_id=1): DOUBLE
271    Double(f64),
272    /// Primitive (type_id=1): BOOLEAN (true)
273    BooleanTrue,
274    /// Primitive (type_id=1): BOOLEAN (false)
275    BooleanFalse,
276    // Note: only need the *value* buffer for these types
277    /// Primitive (type_id=1): BINARY
278    Binary(&'v [u8]),
279    /// Primitive (type_id=1): STRING
280    String(&'v str),
281    /// Primitive (type_id=1): TIME(isAdjustedToUTC=false, MICROS)
282    Time(NaiveTime),
283    /// Primitive (type_id=1): UUID
284    Uuid(Uuid),
285    /// Short String (type_id=2): STRING
286    ShortString(ShortString<'v>),
287    // need both metadata & value
288    /// Object (type_id=3): N/A
289    Object(VariantObject<'m, 'v>),
290    /// Array (type_id=4): N/A
291    List(VariantList<'m, 'v>),
292}
293
294// We don't want this to grow because it could hurt performance of a frequently-created type.
295const _: () = crate::utils::expect_size_of::<Variant>(80);
296
297impl<'m, 'v> Variant<'m, 'v> {
298    /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
299    ///
300    /// The instance is fully [validated].
301    ///
302    /// # Example
303    /// ```
304    /// use parquet_variant::{Variant, VariantMetadata};
305    /// let metadata = [0x01, 0x00, 0x00];
306    /// let value = [0x09, 0x48, 0x49];
307    /// // parse the header metadata
308    /// assert_eq!(
309    ///   Variant::from("HI"),
310    ///   Variant::try_new(&metadata, &value).unwrap()
311    /// );
312    /// ```
313    ///
314    /// [validated]: Self#Validation
315    pub fn try_new(metadata: &'m [u8], value: &'v [u8]) -> Result<Self, ArrowError> {
316        let metadata = VariantMetadata::try_new(metadata)?;
317        Self::try_new_with_metadata(metadata, value)
318    }
319
320    /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
321    ///
322    /// The instance is [unvalidated].
323    ///
324    /// # Example
325    /// ```
326    /// use parquet_variant::{Variant, VariantMetadata};
327    /// let metadata = [0x01, 0x00, 0x00];
328    /// let value = [0x09, 0x48, 0x49];
329    /// // parse the header metadata
330    /// assert_eq!(
331    ///   Variant::from("HI"),
332    ///   Variant::new(&metadata, &value)
333    /// );
334    /// ```
335    ///
336    /// [unvalidated]: Self#Validation
337    pub fn new(metadata: &'m [u8], value: &'v [u8]) -> Self {
338        let metadata = VariantMetadata::try_new_with_shallow_validation(metadata)
339            .expect("Invalid variant metadata");
340        Self::try_new_with_metadata_and_shallow_validation(metadata, value)
341            .expect("Invalid variant data")
342    }
343
344    /// Create a new variant with existing metadata.
345    ///
346    /// The instance is fully [validated].
347    ///
348    /// # Example
349    /// ```
350    /// # use parquet_variant::{Variant, VariantMetadata};
351    /// let metadata = [0x01, 0x00, 0x00];
352    /// let value = [0x09, 0x48, 0x49];
353    /// // parse the header metadata first
354    /// let metadata = VariantMetadata::new(&metadata);
355    /// assert_eq!(
356    ///   Variant::from("HI"),
357    ///   Variant::try_new_with_metadata(metadata, &value).unwrap()
358    /// );
359    /// ```
360    ///
361    /// [validated]: Self#Validation
362    pub fn try_new_with_metadata(
363        metadata: VariantMetadata<'m>,
364        value: &'v [u8],
365    ) -> Result<Self, ArrowError> {
366        Self::try_new_with_metadata_and_shallow_validation(metadata, value)?.with_full_validation()
367    }
368
369    /// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
370    ///
371    /// [unvalidated]: Self#Validation
372    pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
373        Self::try_new_with_metadata_and_shallow_validation(metadata, value)
374            .expect("Invalid variant")
375    }
376
377    // The actual constructor, which only performs shallow (constant-time) validation.
378    fn try_new_with_metadata_and_shallow_validation(
379        metadata: VariantMetadata<'m>,
380        value: &'v [u8],
381    ) -> Result<Self, ArrowError> {
382        let value_metadata = first_byte_from_slice(value)?;
383        let value_data = slice_from_slice(value, 1..)?;
384        let new_self = match get_basic_type(value_metadata) {
385            VariantBasicType::Primitive => match get_primitive_type(value_metadata)? {
386                VariantPrimitiveType::Null => Variant::Null,
387                VariantPrimitiveType::Int8 => Variant::Int8(decoder::decode_int8(value_data)?),
388                VariantPrimitiveType::Int16 => Variant::Int16(decoder::decode_int16(value_data)?),
389                VariantPrimitiveType::Int32 => Variant::Int32(decoder::decode_int32(value_data)?),
390                VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
391                VariantPrimitiveType::Decimal4 => {
392                    let (integer, scale) = decoder::decode_decimal4(value_data)?;
393                    Variant::Decimal4(VariantDecimal4::try_new(integer, scale)?)
394                }
395                VariantPrimitiveType::Decimal8 => {
396                    let (integer, scale) = decoder::decode_decimal8(value_data)?;
397                    Variant::Decimal8(VariantDecimal8::try_new(integer, scale)?)
398                }
399                VariantPrimitiveType::Decimal16 => {
400                    let (integer, scale) = decoder::decode_decimal16(value_data)?;
401                    Variant::Decimal16(VariantDecimal16::try_new(integer, scale)?)
402                }
403                VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
404                VariantPrimitiveType::Double => {
405                    Variant::Double(decoder::decode_double(value_data)?)
406                }
407                VariantPrimitiveType::BooleanTrue => Variant::BooleanTrue,
408                VariantPrimitiveType::BooleanFalse => Variant::BooleanFalse,
409                VariantPrimitiveType::Date => Variant::Date(decoder::decode_date(value_data)?),
410                VariantPrimitiveType::TimestampMicros => {
411                    Variant::TimestampMicros(decoder::decode_timestamp_micros(value_data)?)
412                }
413                VariantPrimitiveType::TimestampNtzMicros => {
414                    Variant::TimestampNtzMicros(decoder::decode_timestampntz_micros(value_data)?)
415                }
416                VariantPrimitiveType::TimestampNanos => {
417                    Variant::TimestampNanos(decoder::decode_timestamp_nanos(value_data)?)
418                }
419                VariantPrimitiveType::TimestampNtzNanos => {
420                    Variant::TimestampNtzNanos(decoder::decode_timestampntz_nanos(value_data)?)
421                }
422                VariantPrimitiveType::Uuid => Variant::Uuid(decoder::decode_uuid(value_data)?),
423                VariantPrimitiveType::Binary => {
424                    Variant::Binary(decoder::decode_binary(value_data)?)
425                }
426                VariantPrimitiveType::String => {
427                    Variant::String(decoder::decode_long_string(value_data)?)
428                }
429                VariantPrimitiveType::Time => Variant::Time(decoder::decode_time_ntz(value_data)?),
430            },
431            VariantBasicType::ShortString => {
432                Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
433            }
434            VariantBasicType::Object => Variant::Object(
435                VariantObject::try_new_with_shallow_validation(metadata, value)?,
436            ),
437            VariantBasicType::Array => Variant::List(VariantList::try_new_with_shallow_validation(
438                metadata, value,
439            )?),
440        };
441        Ok(new_self)
442    }
443
444    /// True if this variant instance has already been [validated].
445    ///
446    /// [validated]: Self#Validation
447    pub fn is_fully_validated(&self) -> bool {
448        match self {
449            Variant::List(list) => list.is_fully_validated(),
450            Variant::Object(obj) => obj.is_fully_validated(),
451            _ => true,
452        }
453    }
454
455    /// Recursively validates this variant value, ensuring that infallible access will not panic due
456    /// to invalid bytes.
457    ///
458    /// Variant leaf values are always valid by construction, but [objects] and [arrays] can be
459    /// constructed in unvalidated (and potentially invalid) state.
460    ///
461    /// If [`Self::is_fully_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)`
462    /// where `m` and `v` are the sizes of metadata and value buffers, respectively.
463    ///
464    /// [objects]: VariantObject#Validation
465    /// [arrays]: VariantList#Validation
466    pub fn with_full_validation(self) -> Result<Self, ArrowError> {
467        use Variant::*;
468        match self {
469            List(list) => list.with_full_validation().map(List),
470            Object(obj) => obj.with_full_validation().map(Object),
471            _ => Ok(self),
472        }
473    }
474
475    /// Converts this variant to `()` if it is null.
476    ///
477    /// Returns `Some(())` for null variants,
478    /// `None` for non-null variants.
479    ///
480    /// # Examples
481    ///
482    /// ```
483    /// use parquet_variant::Variant;
484    ///
485    /// // you can extract `()` from a null variant
486    /// let v1 = Variant::from(());
487    /// assert_eq!(v1.as_null(), Some(()));
488    ///
489    /// // but not from other variants
490    /// let v2 = Variant::from("hello!");
491    /// assert_eq!(v2.as_null(), None);
492    /// ```
493    pub fn as_null(&self) -> Option<()> {
494        matches!(self, Variant::Null).then_some(())
495    }
496
497    /// Converts this variant to a `bool` if possible.
498    ///
499    /// Returns `Some(bool)` for boolean, numeric and string variants,
500    /// `None` for non-boolean variants.
501    ///
502    /// # Examples
503    ///
504    /// ```
505    /// use parquet_variant::Variant;
506    ///
507    /// // you can extract a bool from the true variant
508    /// let v1 = Variant::from(true);
509    /// assert_eq!(v1.as_boolean(), Some(true));
510    ///
511    /// // and the false variant
512    /// let v2 = Variant::from(false);
513    /// assert_eq!(v2.as_boolean(), Some(false));
514    ///
515    /// // and a numeric variant
516    /// let v3 = Variant::from(3);
517    /// assert_eq!(v3.as_boolean(), Some(true));
518    ///
519    /// // and a string variant
520    /// let v4 = Variant::from("true");
521    /// assert_eq!(v4.as_boolean(), Some(true));
522    ///
523    /// // but not from other variants
524    /// let v5 = Variant::from("hello!");
525    /// assert_eq!(v5.as_boolean(), None);
526    /// ```
527    pub fn as_boolean(&self) -> Option<bool> {
528        match self {
529            Variant::BooleanTrue => Some(true),
530            Variant::BooleanFalse => Some(false),
531            Variant::Int8(i) => Some(cast_num_to_bool(*i)),
532            Variant::Int16(i) => Some(cast_num_to_bool(*i)),
533            Variant::Int32(i) => Some(cast_num_to_bool(*i)),
534            Variant::Int64(i) => Some(cast_num_to_bool(*i)),
535            Variant::Float(f) => Some(cast_num_to_bool(*f)),
536            Variant::Double(d) => Some(cast_num_to_bool(*d)),
537            Variant::ShortString(s) => cast_single_string_to_boolean_default(s.as_str()),
538            Variant::String(s) => cast_single_string_to_boolean_default(s),
539            _ => None,
540        }
541    }
542
543    /// Converts this variant to a `NaiveDate` if possible.
544    ///
545    /// Returns `Some(NaiveDate)` for date variants,
546    /// `None` for non-date variants.
547    ///
548    /// # Examples
549    ///
550    /// ```
551    /// use parquet_variant::Variant;
552    /// use chrono::NaiveDate;
553    ///
554    /// // you can extract a NaiveDate from a date variant
555    /// let date = NaiveDate::from_ymd_opt(2025, 4, 12).unwrap();
556    /// let v1 = Variant::from(date);
557    /// assert_eq!(v1.as_naive_date(), Some(date));
558    ///
559    /// // but not from other variants
560    /// let v2 = Variant::from("hello!");
561    /// assert_eq!(v2.as_naive_date(), None);
562    /// ```
563    pub fn as_naive_date(&self) -> Option<NaiveDate> {
564        if let Variant::Date(d) = self {
565            Some(*d)
566        } else {
567            None
568        }
569    }
570
571    /// Converts this variant to a `DateTime<Utc>` if possible.
572    ///
573    /// Returns `Some(DateTime<Utc>)` for [`Variant::TimestampMicros`] variants,
574    /// `None` for other variants.
575    ///
576    /// # Examples
577    ///
578    /// ```
579    /// use parquet_variant::Variant;
580    /// use chrono::NaiveDate;
581    ///
582    /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
583    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
584    ///     .unwrap()
585    ///     .and_hms_milli_opt(12, 34, 56, 780)
586    ///     .unwrap()
587    ///     .and_utc();
588    /// let v1 = Variant::from(datetime);
589    /// assert_eq!(v1.as_timestamp_micros(), Some(datetime));
590    ///
591    /// // but not for other variants.
592    /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
593    ///     .unwrap()
594    ///     .and_hms_nano_opt(12, 33, 54, 123456789)
595    ///     .unwrap()
596    ///     .and_utc();
597    /// let v2 = Variant::from(datetime_nanos);
598    /// assert_eq!(v2.as_timestamp_micros(), None);
599    /// ```
600    pub fn as_timestamp_micros(&self) -> Option<DateTime<Utc>> {
601        match *self {
602            Variant::TimestampMicros(d) => Some(d),
603            _ => None,
604        }
605    }
606
607    /// Converts this variant to a `NaiveDateTime` if possible.
608    ///
609    /// Returns `Some(NaiveDateTime)` for [`Variant::TimestampNtzMicros`] variants,
610    /// `None` for other variants.
611    ///
612    /// # Examples
613    ///
614    /// ```
615    /// use parquet_variant::Variant;
616    /// use chrono::NaiveDate;
617    ///
618    /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
619    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
620    ///     .unwrap()
621    ///     .and_hms_milli_opt(12, 34, 56, 780)
622    ///     .unwrap();
623    /// let v1 = Variant::from(datetime);
624    /// assert_eq!(v1.as_timestamp_ntz_micros(), Some(datetime));
625    ///
626    /// // but not for other variants.
627    /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
628    ///     .unwrap()
629    ///     .and_hms_nano_opt(12, 33, 54, 123456789)
630    ///     .unwrap();
631    /// let v2 = Variant::from(datetime_nanos);
632    /// assert_eq!(v2.as_timestamp_micros(), None);
633    /// ```
634    pub fn as_timestamp_ntz_micros(&self) -> Option<NaiveDateTime> {
635        match *self {
636            Variant::TimestampNtzMicros(d) => Some(d),
637            _ => None,
638        }
639    }
640
641    /// Converts this variant to a `DateTime<Utc>` if possible.
642    ///
643    /// Returns `Some(DateTime<Utc>)` for timestamp variants,
644    /// `None` for other variants.
645    ///
646    /// # Examples
647    ///
648    /// ```
649    /// use parquet_variant::Variant;
650    /// use chrono::NaiveDate;
651    ///
652    /// // you can extract a DateTime<Utc> from a UTC-adjusted nanosecond-precision variant
653    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
654    ///     .unwrap()
655    ///     .and_hms_nano_opt(12, 34, 56, 789123456)
656    ///     .unwrap()
657    ///     .and_utc();
658    /// let v1 = Variant::from(datetime);
659    /// assert_eq!(v1.as_timestamp_nanos(), Some(datetime));
660    ///
661    /// // or from UTC-adjusted microsecond-precision variant
662    /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
663    ///     .unwrap()
664    ///     .and_hms_milli_opt(12, 33, 54, 123)
665    ///     .unwrap()
666    ///     .and_utc();
667    /// // this will convert to `Variant::TimestampMicros`.
668    /// let v2 = Variant::from(datetime_micros);
669    /// assert_eq!(v2.as_timestamp_nanos(), Some(datetime_micros));
670    ///
671    /// // but not for other variants.
672    /// let v3 = Variant::from("hello!");
673    /// assert_eq!(v3.as_timestamp_nanos(), None);
674    /// ```
675    pub fn as_timestamp_nanos(&self) -> Option<DateTime<Utc>> {
676        match *self {
677            Variant::TimestampNanos(d) | Variant::TimestampMicros(d) => Some(d),
678            _ => None,
679        }
680    }
681
682    /// Converts this variant to a `NaiveDateTime` if possible.
683    ///
684    /// Returns `Some(NaiveDateTime)` for timestamp variants,
685    /// `None` for other variants.
686    ///
687    /// # Examples
688    ///
689    /// ```
690    /// use parquet_variant::Variant;
691    /// use chrono::NaiveDate;
692    ///
693    /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
694    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
695    ///     .unwrap()
696    ///     .and_hms_nano_opt(12, 34, 56, 789123456)
697    ///     .unwrap();
698    /// let v1 = Variant::from(datetime);
699    /// assert_eq!(v1.as_timestamp_ntz_nanos(), Some(datetime));
700    ///
701    /// // or from a microsecond-precision non-UTC-adjusted variant
702    /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
703    ///     .unwrap()
704    ///     .and_hms_milli_opt(12, 33, 54, 123)
705    ///     .unwrap();
706    /// // this will convert to `Variant::TimestampMicros`.
707    /// let v2 = Variant::from(datetime_micros);
708    /// assert_eq!(v2.as_timestamp_ntz_nanos(), Some(datetime_micros));
709    ///
710    /// // but not for other variants.
711    /// let v3 = Variant::from("hello!");
712    /// assert_eq!(v3.as_timestamp_ntz_nanos(), None);
713    /// ```
714    pub fn as_timestamp_ntz_nanos(&self) -> Option<NaiveDateTime> {
715        match *self {
716            Variant::TimestampNtzNanos(d) | Variant::TimestampNtzMicros(d) => Some(d),
717            _ => None,
718        }
719    }
720
721    /// Converts this variant to a `&[u8]` if possible.
722    ///
723    /// Returns `Some(&[u8])` for binary variants,
724    /// `None` for non-binary variants.
725    ///
726    /// # Examples
727    ///
728    /// ```
729    /// use parquet_variant::Variant;
730    ///
731    /// // you can extract a byte slice from a binary variant
732    /// let data = b"hello!";
733    /// let v1 = Variant::Binary(data);
734    /// assert_eq!(v1.as_u8_slice(), Some(data.as_slice()));
735    ///
736    /// // but not from other variant types
737    /// let v2 = Variant::from(123i64);
738    /// assert_eq!(v2.as_u8_slice(), None);
739    /// ```
740    pub fn as_u8_slice(&'v self) -> Option<&'v [u8]> {
741        if let Variant::Binary(d) = self {
742            Some(d)
743        } else {
744            None
745        }
746    }
747
748    /// Converts this variant to a `&str` if possible.
749    ///
750    /// Returns `Some(&str)` for string variants (both regular and short strings),
751    /// `None` for non-string variants.
752    ///
753    /// # Examples
754    ///
755    /// ```
756    /// use parquet_variant::Variant;
757    ///
758    /// // you can extract a string from string variants
759    /// let s = "hello!";
760    /// let v1 = Variant::from(s);
761    /// assert_eq!(v1.as_string(), Some(s));
762    ///
763    /// // but not from other variants
764    /// let v2 = Variant::from(123i64);
765    /// assert_eq!(v2.as_string(), None);
766    /// ```
767    pub fn as_string(&'v self) -> Option<&'v str> {
768        match self {
769            Variant::String(s) | Variant::ShortString(ShortString(s)) => Some(s),
770            _ => None,
771        }
772    }
773
774    /// Converts this variant to a `uuid hyphenated string` if possible.
775    ///
776    /// Returns `Some(String)` for UUID variants, `None` for non-UUID variants.
777    ///
778    /// # Examples
779    ///
780    /// ```
781    /// use parquet_variant::Variant;
782    ///
783    /// // You can extract a UUID from a UUID variant
784    /// let s = uuid::Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
785    /// let v1 = Variant::Uuid(s);
786    /// assert_eq!(s, v1.as_uuid().unwrap());
787    /// assert_eq!("67e55044-10b1-426f-9247-bb680e5fe0c8", v1.as_uuid().unwrap().to_string());
788    ///
789    /// //but not from other variants
790    /// let v2 = Variant::from(1234);
791    /// assert_eq!(None, v2.as_uuid())
792    /// ```
793    pub fn as_uuid(&self) -> Option<Uuid> {
794        match self {
795            Variant::Uuid(u) => Some(*u),
796            _ => None,
797        }
798    }
799
800    /// Converts a boolean or numeric variant(integers, floating-point, and decimals with scale 0)
801    /// to the specified numeric type `T`.
802    ///
803    /// Uses Arrow's casting logic to perform the conversion. Returns `Some(T)` if
804    /// the conversion succeeds, `None` if the variant can't be casted to type `T`.
805    fn as_num<T>(&self) -> Option<T>
806    where
807        T: NumCast + Default,
808    {
809        match *self {
810            Variant::BooleanFalse => single_bool_to_numeric(false),
811            Variant::BooleanTrue => single_bool_to_numeric(true),
812            Variant::Int8(i) => num_cast(i),
813            Variant::Int16(i) => num_cast(i),
814            Variant::Int32(i) => num_cast(i),
815            Variant::Int64(i) => num_cast(i),
816            Variant::Float(f) => num_cast(f),
817            Variant::Double(d) => num_cast(d),
818            Variant::Decimal4(d) if d.scale() == 0 => num_cast(d.integer()),
819            Variant::Decimal8(d) if d.scale() == 0 => num_cast(d.integer()),
820            Variant::Decimal16(d) if d.scale() == 0 => num_cast(d.integer()),
821            _ => None,
822        }
823    }
824
825    /// Converts this variant to an `i8` if possible.
826    ///
827    /// Returns `Some(i8)` for boolean and numeric variants(integers, floating-point,
828    /// and decimals with scale 0) that fit in `i8` range,
829    /// `None` for other variants or values that would overflow.
830    ///
831    /// # Examples
832    ///
833    /// ```
834    /// use parquet_variant::Variant;
835    ///
836    /// // you can read an int64 variant into an i8 if it fits
837    /// let v1 = Variant::from(123i64);
838    /// assert_eq!(v1.as_int8(), Some(123i8));
839    ///
840    /// // or from boolean variant
841    /// let v2 = Variant::BooleanFalse;
842    /// assert_eq!(v2.as_int8(), Some(0));
843    ///
844    /// // but not if it would overflow
845    /// let v3 = Variant::from(1234i64);
846    /// assert_eq!(v3.as_int8(), None);
847    ///
848    /// // or if the variant cannot be cast into an integer
849    /// let v4 = Variant::from("hello!");
850    /// assert_eq!(v4.as_int8(), None);
851    /// ```
852    pub fn as_int8(&self) -> Option<i8> {
853        self.as_num()
854    }
855
856    /// Converts this variant to an `i16` if possible.
857    ///
858    /// Returns `Some(i16)` for boolean and numeric variants(integers, floating-point,
859    /// and decimals with scale 0) that fit in `i16` range
860    /// `None` for other variants or values that would overflow.
861    ///
862    /// # Examples
863    ///
864    /// ```
865    /// use parquet_variant::Variant;
866    ///
867    /// // you can read an int64 variant into an i16 if it fits
868    /// let v1 = Variant::from(123i64);
869    /// assert_eq!(v1.as_int16(), Some(123i16));
870    ///
871    /// // or from boolean variant
872    /// let v2 = Variant::BooleanFalse;
873    /// assert_eq!(v2.as_int16(), Some(0));
874    ///
875    /// // but not if it would overflow
876    /// let v3 = Variant::from(123456i64);
877    /// assert_eq!(v3.as_int16(), None);
878    ///
879    /// // or if the variant cannot be cast into an integer
880    /// let v4 = Variant::from("hello!");
881    /// assert_eq!(v4.as_int16(), None);
882    /// ```
883    pub fn as_int16(&self) -> Option<i16> {
884        self.as_num()
885    }
886
887    /// Converts this variant to an `i32` if possible.
888    ///
889    /// Returns `Some(i32)` for boolean and numeric variants(integers, floating-point,
890    /// and decimals with scale 0) that fit in `i32` range
891    /// `None` for other variants or values that would overflow.
892    ///
893    /// # Examples
894    ///
895    /// ```
896    /// use parquet_variant::Variant;
897    ///
898    /// // you can read an int64 variant into an i32 if it fits
899    /// let v1 = Variant::from(123i64);
900    /// assert_eq!(v1.as_int32(), Some(123i32));
901    ///
902    /// // or from boolean variant
903    /// let v2 = Variant::BooleanFalse;
904    /// assert_eq!(v2.as_int32(), Some(0));
905    ///
906    /// // but not if it would overflow
907    /// let v3 = Variant::from(12345678901i64);
908    /// assert_eq!(v3.as_int32(), None);
909    ///
910    /// // or if the variant cannot be cast into an integer
911    /// let v4 = Variant::from("hello!");
912    /// assert_eq!(v4.as_int32(), None);
913    /// ```
914    pub fn as_int32(&self) -> Option<i32> {
915        self.as_num()
916    }
917
918    /// Converts this variant to an `i64` if possible.
919    ///
920    /// Returns `Some(i64)` for boolean and numeric variants(integers, floating-point,
921    /// and decimals with scale 0) that fit in `i64` range
922    /// `None` for other variants or values that would overflow.
923    ///
924    /// # Examples
925    ///
926    /// ```
927    /// use parquet_variant::Variant;
928    ///
929    /// // you can read an int64 variant into an i64
930    /// let v1 = Variant::from(123i64);
931    /// assert_eq!(v1.as_int64(), Some(123i64));
932    ///
933    /// // or from boolean variant
934    /// let v2 = Variant::BooleanFalse;
935    /// assert_eq!(v2.as_int64(), Some(0));
936    ///
937    /// // but not a variant that cannot be cast into an integer
938    /// let v3 = Variant::from("hello!");
939    /// assert_eq!(v3.as_int64(), None);
940    /// ```
941    pub fn as_int64(&self) -> Option<i64> {
942        self.as_num()
943    }
944
945    /// Converts this variant to a `u8` if possible.
946    ///
947    /// Returns `Some(u8)` for boolean and numeric variants(integers, floating-point,
948    /// and decimals with scale 0) that fit in `u8` range
949    /// `None` for other variants or values that would overflow.
950    ///
951    /// # Examples
952    ///
953    /// ```
954    ///  use parquet_variant::{Variant, VariantDecimal4};
955    ///
956    ///  // you can read an int64 variant into an u8
957    ///  let v1 = Variant::from(123i64);
958    ///  assert_eq!(v1.as_u8(), Some(123u8));
959    ///
960    ///  // or a Decimal4 with scale 0 into u8
961    ///  let d = VariantDecimal4::try_new(26, 0).unwrap();
962    ///  let v2 = Variant::from(d);
963    ///  assert_eq!(v2.as_u8(), Some(26u8));
964    ///
965    /// // or from boolean variant
966    /// let v3 = Variant::BooleanFalse;
967    /// assert_eq!(v3.as_u8(), Some(0));
968    ///
969    ///  // but not a variant that can't fit into the range
970    ///  let v4 = Variant::from(-1);
971    ///  assert_eq!(v4.as_u8(), None);
972    ///
973    ///  // not a variant that decimal with scale not equal to zero
974    ///  let d = VariantDecimal4::try_new(1, 2).unwrap();
975    ///  let v5 = Variant::from(d);
976    ///  assert_eq!(v5.as_u8(), None);
977    ///
978    ///  // or not a variant that cannot be cast into an integer
979    ///  let v6 = Variant::from("hello!");
980    ///  assert_eq!(v6.as_u8(), None);
981    /// ```
982    pub fn as_u8(&self) -> Option<u8> {
983        self.as_num()
984    }
985
986    /// Converts this variant to an `u16` if possible.
987    ///
988    /// Returns `Some(u16)` for boolean and numeric variants(integers, floating-point,
989    /// and decimals with scale 0) that fit in `u16` range
990    /// `None` for other variants or values that would overflow.
991    ///
992    /// # Examples
993    ///
994    /// ```
995    ///  use parquet_variant::{Variant, VariantDecimal4};
996    ///
997    ///  // you can read an int64 variant into an u16
998    ///  let v1 = Variant::from(123i64);
999    ///  assert_eq!(v1.as_u16(), Some(123u16));
1000    ///
1001    ///  // or a Decimal4 with scale 0 into u8
1002    ///  let d = VariantDecimal4::try_new(u16::MAX as i32, 0).unwrap();
1003    ///  let v2 = Variant::from(d);
1004    ///  assert_eq!(v2.as_u16(), Some(u16::MAX));
1005    ///
1006    /// // or from boolean variant
1007    /// let v3= Variant::BooleanFalse;
1008    /// assert_eq!(v3.as_u16(), Some(0));
1009    ///
1010    ///  // but not a variant that can't fit into the range
1011    ///  let v4 = Variant::from(-1);
1012    ///  assert_eq!(v4.as_u16(), None);
1013    ///
1014    ///  // not a variant that decimal with scale not equal to zero
1015    ///  let d = VariantDecimal4::try_new(1, 2).unwrap();
1016    ///  let v5 = Variant::from(d);
1017    ///  assert_eq!(v5.as_u16(), None);
1018    ///
1019    ///  // or not a variant that cannot be cast into an integer
1020    ///  let v6 = Variant::from("hello!");
1021    ///  assert_eq!(v6.as_u16(), None);
1022    /// ```
1023    pub fn as_u16(&self) -> Option<u16> {
1024        self.as_num()
1025    }
1026
1027    /// Converts this variant to an `u32` if possible.
1028    ///
1029    /// Returns `Some(u32)` for boolean and numeric variants(integers, floating-point,
1030    /// and decimals with scale 0) that fit in `u32` range
1031    /// `None` for other variants or values that would overflow.
1032    ///
1033    /// # Examples
1034    ///
1035    /// ```
1036    ///  use parquet_variant::{Variant, VariantDecimal8};
1037    ///
1038    ///  // you can read an int64 variant into an u32
1039    ///  let v1 = Variant::from(123i64);
1040    ///  assert_eq!(v1.as_u32(), Some(123u32));
1041    ///
1042    ///  // or a Decimal4 with scale 0 into u8
1043    ///  let d = VariantDecimal8::try_new(u32::MAX as i64, 0).unwrap();
1044    ///  let v2 = Variant::from(d);
1045    ///  assert_eq!(v2.as_u32(), Some(u32::MAX));
1046    ///
1047    /// // or from boolean variant
1048    /// let v3 = Variant::BooleanFalse;
1049    /// assert_eq!(v3.as_u32(), Some(0));
1050    ///
1051    ///  // but not a variant that can't fit into the range
1052    ///  let v4 = Variant::from(-1);
1053    ///  assert_eq!(v4.as_u32(), None);
1054    ///
1055    ///  // not a variant that decimal with scale not equal to zero
1056    ///  let d = VariantDecimal8::try_new(1, 2).unwrap();
1057    ///  let v5 = Variant::from(d);
1058    ///  assert_eq!(v5.as_u32(), None);
1059    ///
1060    ///  // or not a variant that cannot be cast into an integer
1061    ///  let v6 = Variant::from("hello!");
1062    ///  assert_eq!(v6.as_u32(), None);
1063    /// ```
1064    pub fn as_u32(&self) -> Option<u32> {
1065        self.as_num()
1066    }
1067
1068    /// Converts this variant to an `u64` if possible.
1069    ///
1070    /// Returns `Some(u64)` for boolean and numeric variants(integers, floating-point,
1071    /// and decimals with scale 0) that fit in `u64` range
1072    /// `None` for other variants or values that would overflow.
1073    ///
1074    /// # Examples
1075    ///
1076    /// ```
1077    ///  use parquet_variant::{Variant, VariantDecimal16};
1078    ///
1079    ///  // you can read an int64 variant into an u64
1080    ///  let v1 = Variant::from(123i64);
1081    ///  assert_eq!(v1.as_u64(), Some(123u64));
1082    ///
1083    ///  // or a Decimal16 with scale 0 into u8
1084    ///  let d = VariantDecimal16::try_new(u64::MAX as i128, 0).unwrap();
1085    ///  let v2 = Variant::from(d);
1086    ///  assert_eq!(v2.as_u64(), Some(u64::MAX));
1087    ///
1088    /// // or from boolean variant
1089    /// let v3 = Variant::BooleanFalse;
1090    /// assert_eq!(v3.as_u64(), Some(0));
1091    ///
1092    ///  // but not a variant that can't fit into the range
1093    ///  let v4 = Variant::from(-1);
1094    ///  assert_eq!(v4.as_u64(), None);
1095    ///
1096    ///  // not a variant that decimal with scale not equal to zero
1097    /// let d = VariantDecimal16::try_new(1, 2).unwrap();
1098    ///  let v5 = Variant::from(d);
1099    ///  assert_eq!(v5.as_u64(), None);
1100    ///
1101    ///  // or not a variant that cannot be cast into an integer
1102    ///  let v6 = Variant::from("hello!");
1103    ///  assert_eq!(v6.as_u64(), None);
1104    /// ```
1105    pub fn as_u64(&self) -> Option<u64> {
1106        self.as_num()
1107    }
1108
1109    /// Converts this variant to tuple with a 4-byte unscaled value if possible.
1110    ///
1111    /// Returns `Some((i32, u8))` for decimal variants where the unscaled value
1112    /// fits in `i32` range,
1113    /// `None` for non-decimal variants or decimal values that would overflow.
1114    ///
1115    /// # Examples
1116    ///
1117    /// ```
1118    /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8};
1119    ///
1120    /// // you can extract decimal parts from smaller or equally-sized decimal variants
1121    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1122    /// assert_eq!(v1.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
1123    ///
1124    /// // and from larger decimal variants if they fit
1125    /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap());
1126    /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
1127    ///
1128    /// // but not if the value would overflow i32
1129    /// let v3 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap());
1130    /// assert_eq!(v3.as_decimal4(), None);
1131    ///
1132    /// // or if the variant is not a decimal
1133    /// let v4 = Variant::from("hello!");
1134    /// assert_eq!(v4.as_decimal4(), None);
1135    /// ```
1136    pub fn as_decimal4(&self) -> Option<VariantDecimal4> {
1137        match *self {
1138            Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1139                self.as_num::<i32>().and_then(|x| x.try_into().ok())
1140            }
1141            Variant::Decimal4(decimal4) => Some(decimal4),
1142            Variant::Decimal8(decimal8) => decimal8.try_into().ok(),
1143            Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
1144            _ => None,
1145        }
1146    }
1147
1148    /// Converts this variant to tuple with an 8-byte unscaled value if possible.
1149    ///
1150    /// Returns `Some((i64, u8))` for decimal variants where the unscaled value
1151    /// fits in `i64` range,
1152    /// `None` for non-decimal variants or decimal values that would overflow.
1153    ///
1154    /// # Examples
1155    ///
1156    /// ```
1157    /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
1158    ///
1159    /// // you can extract decimal parts from smaller or equally-sized decimal variants
1160    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1161    /// assert_eq!(v1.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
1162    ///
1163    /// // and from larger decimal variants if they fit
1164    /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap());
1165    /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
1166    ///
1167    /// // but not if the value would overflow i64
1168    /// let v3 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap());
1169    /// assert_eq!(v3.as_decimal8(), None);
1170    ///
1171    /// // or if the variant is not a decimal
1172    /// let v4 = Variant::from("hello!");
1173    /// assert_eq!(v4.as_decimal8(), None);
1174    /// ```
1175    pub fn as_decimal8(&self) -> Option<VariantDecimal8> {
1176        match *self {
1177            Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1178                self.as_num::<i64>().and_then(|x| x.try_into().ok())
1179            }
1180            Variant::Decimal4(decimal4) => Some(decimal4.into()),
1181            Variant::Decimal8(decimal8) => Some(decimal8),
1182            Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
1183            _ => None,
1184        }
1185    }
1186
1187    /// Converts this variant to tuple with a 16-byte unscaled value if possible.
1188    ///
1189    /// Returns `Some((i128, u8))` for decimal variants where the unscaled value
1190    /// fits in `i128` range,
1191    /// `None` for non-decimal variants or decimal values that would overflow.
1192    ///
1193    /// # Examples
1194    ///
1195    /// ```
1196    /// use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4};
1197    ///
1198    /// // you can extract decimal parts from smaller or equally-sized decimal variants
1199    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1200    /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok());
1201    ///
1202    /// // but not if the variant is not a decimal
1203    /// let v2 = Variant::from("hello!");
1204    /// assert_eq!(v2.as_decimal16(), None);
1205    /// ```
1206    pub fn as_decimal16(&self) -> Option<VariantDecimal16> {
1207        match *self {
1208            Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1209                self.as_num::<i128>().and_then(|x| x.try_into().ok())
1210            }
1211            Variant::Decimal4(decimal4) => Some(decimal4.into()),
1212            Variant::Decimal8(decimal8) => Some(decimal8.into()),
1213            Variant::Decimal16(decimal16) => Some(decimal16),
1214            _ => None,
1215        }
1216    }
1217
1218    /// Converts this variant to an `f16` if possible.
1219    ///
1220    /// Returns `Some(f16)` for boolean and numeric variants(integers, floating-point,
1221    /// and decimals with scale 0) that fit in `f16` range
1222    /// `None` otherwise.
1223    ///
1224    /// # Example
1225    ///
1226    /// ```
1227    /// use parquet_variant::Variant;
1228    /// use half::f16;
1229    ///
1230    /// // you can extract an f16 from a float variant
1231    /// let v1 = Variant::from(std::f32::consts::PI);
1232    /// assert_eq!(v1.as_f16(), Some(f16::from_f32(std::f32::consts::PI)));
1233    ///
1234    /// // and from a double variant (with loss of precision to nearest f16)
1235    /// let v2 = Variant::from(std::f64::consts::PI);
1236    /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI)));
1237    ///
1238    /// // and from boolean
1239    /// let v3 = Variant::BooleanTrue;
1240    /// assert_eq!(v3.as_f16(), Some(f16::from_f32(1.0)));
1241    ///
1242    /// // return inf if overflow
1243    /// let v4 = Variant::from(123456);
1244    /// assert_eq!(v4.as_f16(), Some(f16::INFINITY));
1245    ///
1246    /// // but not from other variants
1247    /// let v5 = Variant::from("hello!");
1248    /// assert_eq!(v5.as_f16(), None);
1249    pub fn as_f16(&self) -> Option<f16> {
1250        self.as_num()
1251    }
1252
1253    /// Converts this variant to an `f32` if possible.
1254    ///
1255    /// Returns `Some(f32)` for boolean and numeric variants(integers, floating-point,
1256    /// and decimals with scale 0) that fit in `f32` range
1257    /// `None` otherwise.
1258    ///
1259    /// # Examples
1260    ///
1261    /// ```
1262    /// use parquet_variant::Variant;
1263    ///
1264    /// // you can extract an f32 from a float variant
1265    /// let v1 = Variant::from(std::f32::consts::PI);
1266    /// assert_eq!(v1.as_f32(), Some(std::f32::consts::PI));
1267    ///
1268    /// // and from a double variant (with loss of precision to nearest f32)
1269    /// let v2 = Variant::from(std::f64::consts::PI);
1270    /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
1271    ///
1272    /// // and from boolean variant
1273    /// let v3 = Variant::BooleanTrue;
1274    /// assert_eq!(v3.as_f32(), Some(1.0));
1275    ///
1276    /// // and return inf if overflow
1277    /// let v4 = Variant::from(f64::MAX);
1278    /// assert_eq!(v4.as_f32(), Some(f32::INFINITY));
1279    ///
1280    /// // but not from other variants
1281    /// let v5 = Variant::from("hello!");
1282    /// assert_eq!(v5.as_f32(), None);
1283    /// ```
1284    pub fn as_f32(&self) -> Option<f32> {
1285        self.as_num()
1286    }
1287
1288    /// Converts this variant to an `f64` if possible.
1289    ///
1290    /// Returns `Some(f64)` for boolean and numeric variants(integers, floating-point,
1291    /// and decimals with scale 0) that fit in `f64` range
1292    /// `None` for other variants or can't be represented by an f64.
1293    ///
1294    /// # Examples
1295    ///
1296    /// ```
1297    /// use parquet_variant::Variant;
1298    ///
1299    /// // you can extract an f64 from a float variant
1300    /// let v1 = Variant::from(std::f32::consts::PI);
1301    /// assert_eq!(v1.as_f64(), Some(std::f32::consts::PI as f64));
1302    ///
1303    /// // and from a double variant
1304    /// let v2 = Variant::from(std::f64::consts::PI);
1305    /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
1306    ///
1307    /// // and from boolean variant
1308    /// let v3 = Variant::BooleanTrue;
1309    /// assert_eq!(v3.as_f64(), Some(1.0f64));
1310    ///
1311    /// // but not from other variants
1312    /// let v5 = Variant::from("hello!");
1313    /// assert_eq!(v5.as_f64(), None);
1314    /// ```
1315    pub fn as_f64(&self) -> Option<f64> {
1316        self.as_num()
1317    }
1318
1319    /// Converts this variant to an `Object` if it is an [`VariantObject`].
1320    ///
1321    /// Returns `Some(&VariantObject)` for object variants,
1322    /// `None` for non-object variants.
1323    ///
1324    /// See [`Self::get_path`] to dynamically traverse objects
1325    ///
1326    /// # Examples
1327    /// ```
1328    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1329    /// # let (metadata, value) = {
1330    /// # let mut builder = VariantBuilder::new();
1331    /// #   let mut obj = builder.new_object();
1332    /// #   obj.insert("name", "John");
1333    /// #   obj.finish();
1334    /// #   builder.finish()
1335    /// # };
1336    /// // object that is {"name": "John"}
1337    ///  let variant = Variant::new(&metadata, &value);
1338    /// // use the `as_object` method to access the object
1339    /// let obj = variant.as_object().expect("variant should be an object");
1340    /// assert_eq!(obj.get("name"), Some(Variant::from("John")));
1341    /// ```
1342    pub fn as_object(&'m self) -> Option<&'m VariantObject<'m, 'v>> {
1343        if let Variant::Object(obj) = self {
1344            Some(obj)
1345        } else {
1346            None
1347        }
1348    }
1349
1350    /// If this is an object and the requested field name exists, retrieves the corresponding field
1351    /// value. Otherwise, returns None.
1352    ///
1353    /// This is shorthand for [`Self::as_object`] followed by [`VariantObject::get`].
1354    ///
1355    /// # Examples
1356    /// ```
1357    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1358    /// # let mut builder = VariantBuilder::new();
1359    /// # let mut obj = builder.new_object();
1360    /// # obj.insert("name", "John");
1361    /// # obj.finish();
1362    /// # let (metadata, value) = builder.finish();
1363    /// // object that is {"name": "John"}
1364    ///  let variant = Variant::new(&metadata, &value);
1365    /// // use the `get_object_field` method to access the object
1366    /// let obj = variant.get_object_field("name");
1367    /// assert_eq!(obj, Some(Variant::from("John")));
1368    /// let obj = variant.get_object_field("foo");
1369    /// assert!(obj.is_none());
1370    /// ```
1371    pub fn get_object_field(&self, field_name: &str) -> Option<Self> {
1372        match self {
1373            Variant::Object(object) => object.get(field_name),
1374            _ => None,
1375        }
1376    }
1377
1378    /// Converts this variant to a `List` if it is a [`VariantList`].
1379    ///
1380    /// Returns `Some(&VariantList)` for list variants,
1381    /// `None` for non-list variants.
1382    ///
1383    /// See [`Self::get_path`] to dynamically traverse lists
1384    ///
1385    /// # Examples
1386    /// ```
1387    /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1388    /// # let (metadata, value) = {
1389    /// # let mut builder = VariantBuilder::new();
1390    /// #   let mut list = builder.new_list();
1391    /// #   list.append_value("John");
1392    /// #   list.append_value("Doe");
1393    /// #   list.finish();
1394    /// #   builder.finish()
1395    /// # };
1396    /// // list that is ["John", "Doe"]
1397    /// let variant = Variant::new(&metadata, &value);
1398    /// // use the `as_list` method to access the list
1399    /// let list = variant.as_list().expect("variant should be a list");
1400    /// assert_eq!(list.len(), 2);
1401    /// assert_eq!(list.get(0).unwrap(), Variant::from("John"));
1402    /// assert_eq!(list.get(1).unwrap(), Variant::from("Doe"));
1403    /// ```
1404    pub fn as_list(&'m self) -> Option<&'m VariantList<'m, 'v>> {
1405        if let Variant::List(list) = self {
1406            Some(list)
1407        } else {
1408            None
1409        }
1410    }
1411
1412    /// Converts this variant to a `NaiveTime` if possible.
1413    ///
1414    /// Returns `Some(NaiveTime)` for `Variant::Time`,
1415    /// `None` for non-Time variants.
1416    ///
1417    /// # Example
1418    ///
1419    /// ```
1420    /// use chrono::NaiveTime;
1421    /// use parquet_variant::Variant;
1422    ///
1423    /// // you can extract a `NaiveTime` from a `Variant::Time`
1424    /// let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1425    /// let v1 = Variant::from(time);
1426    /// assert_eq!(Some(time), v1.as_time_utc());
1427    ///
1428    /// // but not from other variants.
1429    /// let v2 = Variant::from("Hello");
1430    /// assert_eq!(None, v2.as_time_utc());
1431    /// ```
1432    pub fn as_time_utc(&'m self) -> Option<NaiveTime> {
1433        if let Variant::Time(time) = self {
1434            Some(*time)
1435        } else {
1436            None
1437        }
1438    }
1439
1440    /// If this is a list and the requested index is in bounds, retrieves the corresponding
1441    /// element. Otherwise, returns None.
1442    ///
1443    /// This is shorthand for [`Self::as_list`] followed by [`VariantList::get`].
1444    ///
1445    /// # Examples
1446    /// ```
1447    /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1448    /// # let mut builder = VariantBuilder::new();
1449    /// # let mut list = builder.new_list();
1450    /// # list.append_value("John");
1451    /// # list.append_value("Doe");
1452    /// # list.finish();
1453    /// # let (metadata, value) = builder.finish();
1454    /// // list that is ["John", "Doe"]
1455    /// let variant = Variant::new(&metadata, &value);
1456    /// // use the `get_list_element` method to access the list
1457    /// assert_eq!(variant.get_list_element(0), Some(Variant::from("John")));
1458    /// assert_eq!(variant.get_list_element(1), Some(Variant::from("Doe")));
1459    /// assert!(variant.get_list_element(2).is_none());
1460    /// ```
1461    pub fn get_list_element(&self, index: usize) -> Option<Self> {
1462        match self {
1463            Variant::List(list) => list.get(index),
1464            _ => None,
1465        }
1466    }
1467
1468    /// Return the metadata dictionary associated with this variant value.
1469    pub fn metadata(&self) -> &VariantMetadata<'m> {
1470        match self {
1471            Variant::Object(VariantObject { metadata, .. })
1472            | Variant::List(VariantList { metadata, .. }) => metadata,
1473            _ => &EMPTY_VARIANT_METADATA,
1474        }
1475    }
1476
1477    /// Return a new Variant with the path followed.
1478    ///
1479    /// If the path is not found, `None` is returned.
1480    ///
1481    /// # Example
1482    /// ```
1483    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath};
1484    /// # let mut builder = VariantBuilder::new();
1485    /// # let mut obj = builder.new_object();
1486    /// # let mut list = obj.new_list("foo");
1487    /// # list.append_value("bar");
1488    /// # list.append_value("baz");
1489    /// # list.finish();
1490    /// # obj.finish();
1491    /// # let (metadata, value) = builder.finish();
1492    /// // given a variant like `{"foo": ["bar", "baz"]}`
1493    /// let variant = Variant::new(&metadata, &value);
1494    /// // Accessing a non existent path returns None
1495    /// assert_eq!(variant.get_path(&VariantPath::try_from("non_existent").unwrap()), None);
1496    /// // Access obj["foo"]
1497    /// let path = VariantPath::try_from("foo").unwrap();
1498    /// let foo = variant.get_path(&path).expect("field `foo` should exist");
1499    /// assert!(foo.as_list().is_some(), "field `foo` should be a list");
1500    /// // Access foo[0]
1501    /// let path = VariantPath::from(0);
1502    /// let bar = foo.get_path(&path).expect("element 0 should exist");
1503    /// // bar is a string
1504    /// assert_eq!(bar.as_string(), Some("bar"));
1505    /// // You can also access nested paths
1506    /// let path = VariantPath::try_from("foo").unwrap().join(0);
1507    /// assert_eq!(variant.get_path(&path).unwrap(), bar);
1508    /// ```
1509    pub fn get_path(&self, path: &VariantPath) -> Option<Variant<'_, '_>> {
1510        path.iter()
1511            .try_fold(self.clone(), |output, element| match element {
1512                VariantPathElement::Field { name } => output.get_object_field(name),
1513                VariantPathElement::Index { index } => output.get_list_element(*index),
1514            })
1515    }
1516}
1517
1518impl From<()> for Variant<'_, '_> {
1519    fn from((): ()) -> Self {
1520        Variant::Null
1521    }
1522}
1523
1524impl From<bool> for Variant<'_, '_> {
1525    fn from(value: bool) -> Self {
1526        match value {
1527            true => Variant::BooleanTrue,
1528            false => Variant::BooleanFalse,
1529        }
1530    }
1531}
1532
1533impl From<i8> for Variant<'_, '_> {
1534    fn from(value: i8) -> Self {
1535        Variant::Int8(value)
1536    }
1537}
1538
1539impl From<i16> for Variant<'_, '_> {
1540    fn from(value: i16) -> Self {
1541        Variant::Int16(value)
1542    }
1543}
1544
1545impl From<i32> for Variant<'_, '_> {
1546    fn from(value: i32) -> Self {
1547        Variant::Int32(value)
1548    }
1549}
1550
1551impl From<i64> for Variant<'_, '_> {
1552    fn from(value: i64) -> Self {
1553        Variant::Int64(value)
1554    }
1555}
1556
1557impl From<u8> for Variant<'_, '_> {
1558    fn from(value: u8) -> Self {
1559        // if it fits in i8, use that, otherwise use i16
1560        if let Ok(value) = i8::try_from(value) {
1561            Variant::Int8(value)
1562        } else {
1563            Variant::Int16(num_cast(value).unwrap()) // u8 -> i16 is infallible
1564        }
1565    }
1566}
1567
1568impl From<u16> for Variant<'_, '_> {
1569    fn from(value: u16) -> Self {
1570        // if it fits in i16, use that, otherwise use i32
1571        if let Ok(value) = i16::try_from(value) {
1572            Variant::Int16(value)
1573        } else {
1574            Variant::Int32(num_cast(value).unwrap()) // u16 -> i32 is infallible
1575        }
1576    }
1577}
1578impl From<u32> for Variant<'_, '_> {
1579    fn from(value: u32) -> Self {
1580        // if it fits in i32, use that, otherwise use i64
1581        if let Ok(value) = i32::try_from(value) {
1582            Variant::Int32(value)
1583        } else {
1584            Variant::Int64(num_cast(value).unwrap()) // u32 -> i64 is infallible
1585        }
1586    }
1587}
1588
1589impl From<u64> for Variant<'_, '_> {
1590    fn from(value: u64) -> Self {
1591        // if it fits in i64, use that, otherwise use Decimal16
1592        if let Ok(value) = i64::try_from(value) {
1593            Variant::Int64(value)
1594        } else {
1595            // u64 max is 18446744073709551615, which fits in i128
1596            Variant::Decimal16(VariantDecimal16::try_new(num_cast(value).unwrap(), 0).unwrap())
1597        }
1598    }
1599}
1600
1601impl From<VariantDecimal4> for Variant<'_, '_> {
1602    fn from(value: VariantDecimal4) -> Self {
1603        Variant::Decimal4(value)
1604    }
1605}
1606
1607impl From<VariantDecimal8> for Variant<'_, '_> {
1608    fn from(value: VariantDecimal8) -> Self {
1609        Variant::Decimal8(value)
1610    }
1611}
1612
1613impl From<VariantDecimal16> for Variant<'_, '_> {
1614    fn from(value: VariantDecimal16) -> Self {
1615        Variant::Decimal16(value)
1616    }
1617}
1618
1619impl From<half::f16> for Variant<'_, '_> {
1620    fn from(value: half::f16) -> Self {
1621        Variant::Float(value.into())
1622    }
1623}
1624
1625impl From<f32> for Variant<'_, '_> {
1626    fn from(value: f32) -> Self {
1627        Variant::Float(value)
1628    }
1629}
1630
1631impl From<f64> for Variant<'_, '_> {
1632    fn from(value: f64) -> Self {
1633        Variant::Double(value)
1634    }
1635}
1636
1637impl From<NaiveDate> for Variant<'_, '_> {
1638    fn from(value: NaiveDate) -> Self {
1639        Variant::Date(value)
1640    }
1641}
1642
1643impl From<DateTime<Utc>> for Variant<'_, '_> {
1644    fn from(value: DateTime<Utc>) -> Self {
1645        if value.nanosecond() % 1000 > 0 {
1646            Variant::TimestampNanos(value)
1647        } else {
1648            Variant::TimestampMicros(value)
1649        }
1650    }
1651}
1652
1653impl From<NaiveDateTime> for Variant<'_, '_> {
1654    fn from(value: NaiveDateTime) -> Self {
1655        if value.nanosecond() % 1000 > 0 {
1656            Variant::TimestampNtzNanos(value)
1657        } else {
1658            Variant::TimestampNtzMicros(value)
1659        }
1660    }
1661}
1662
1663impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
1664    fn from(value: &'v [u8]) -> Self {
1665        Variant::Binary(value)
1666    }
1667}
1668
1669impl From<NaiveTime> for Variant<'_, '_> {
1670    fn from(value: NaiveTime) -> Self {
1671        Variant::Time(value)
1672    }
1673}
1674
1675impl From<Uuid> for Variant<'_, '_> {
1676    fn from(value: Uuid) -> Self {
1677        Variant::Uuid(value)
1678    }
1679}
1680
1681impl<'v> From<&'v str> for Variant<'_, 'v> {
1682    fn from(value: &'v str) -> Self {
1683        if value.len() > MAX_SHORT_STRING_BYTES {
1684            Variant::String(value)
1685        } else {
1686            Variant::ShortString(ShortString(value))
1687        }
1688    }
1689}
1690
1691impl TryFrom<(i32, u8)> for Variant<'_, '_> {
1692    type Error = ArrowError;
1693
1694    fn try_from(value: (i32, u8)) -> Result<Self, Self::Error> {
1695        Ok(Variant::Decimal4(VariantDecimal4::try_new(
1696            value.0, value.1,
1697        )?))
1698    }
1699}
1700
1701impl TryFrom<(i64, u8)> for Variant<'_, '_> {
1702    type Error = ArrowError;
1703
1704    fn try_from(value: (i64, u8)) -> Result<Self, Self::Error> {
1705        Ok(Variant::Decimal8(VariantDecimal8::try_new(
1706            value.0, value.1,
1707        )?))
1708    }
1709}
1710
1711impl TryFrom<(i128, u8)> for Variant<'_, '_> {
1712    type Error = ArrowError;
1713
1714    fn try_from(value: (i128, u8)) -> Result<Self, Self::Error> {
1715        Ok(Variant::Decimal16(VariantDecimal16::try_new(
1716            value.0, value.1,
1717        )?))
1718    }
1719}
1720
1721// helper to print <invalid> instead of "<invalid>" in debug mode when a VariantObject or VariantList contains invalid values.
1722struct InvalidVariant;
1723
1724impl std::fmt::Debug for InvalidVariant {
1725    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1726        write!(f, "<invalid>")
1727    }
1728}
1729
1730// helper to print binary data in hex format in debug mode, as space-separated hex byte values.
1731struct HexString<'a>(&'a [u8]);
1732
1733impl<'a> std::fmt::Debug for HexString<'a> {
1734    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1735        if let Some((first, rest)) = self.0.split_first() {
1736            write!(f, "{:02x}", first)?;
1737            for b in rest {
1738                write!(f, " {:02x}", b)?;
1739            }
1740        }
1741        Ok(())
1742    }
1743}
1744
1745impl std::fmt::Debug for Variant<'_, '_> {
1746    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1747        match self {
1748            Variant::Null => write!(f, "Null"),
1749            Variant::BooleanTrue => write!(f, "BooleanTrue"),
1750            Variant::BooleanFalse => write!(f, "BooleanFalse"),
1751            Variant::Int8(v) => f.debug_tuple("Int8").field(v).finish(),
1752            Variant::Int16(v) => f.debug_tuple("Int16").field(v).finish(),
1753            Variant::Int32(v) => f.debug_tuple("Int32").field(v).finish(),
1754            Variant::Int64(v) => f.debug_tuple("Int64").field(v).finish(),
1755            Variant::Float(v) => f.debug_tuple("Float").field(v).finish(),
1756            Variant::Double(v) => f.debug_tuple("Double").field(v).finish(),
1757            Variant::Decimal4(d) => f.debug_tuple("Decimal4").field(d).finish(),
1758            Variant::Decimal8(d) => f.debug_tuple("Decimal8").field(d).finish(),
1759            Variant::Decimal16(d) => f.debug_tuple("Decimal16").field(d).finish(),
1760            Variant::Date(d) => f.debug_tuple("Date").field(d).finish(),
1761            Variant::TimestampMicros(ts) => f.debug_tuple("TimestampMicros").field(ts).finish(),
1762            Variant::TimestampNtzMicros(ts) => {
1763                f.debug_tuple("TimestampNtzMicros").field(ts).finish()
1764            }
1765            Variant::TimestampNanos(ts) => f.debug_tuple("TimestampNanos").field(ts).finish(),
1766            Variant::TimestampNtzNanos(ts) => f.debug_tuple("TimestampNtzNanos").field(ts).finish(),
1767            Variant::Binary(bytes) => write!(f, "Binary({:?})", HexString(bytes)),
1768            Variant::String(s) => f.debug_tuple("String").field(s).finish(),
1769            Variant::Time(s) => f.debug_tuple("Time").field(s).finish(),
1770            Variant::ShortString(s) => f.debug_tuple("ShortString").field(s).finish(),
1771            Variant::Uuid(uuid) => f.debug_tuple("Uuid").field(&uuid).finish(),
1772            Variant::Object(obj) => {
1773                let mut map = f.debug_map();
1774                for res in obj.iter_try() {
1775                    match res {
1776                        Ok((k, v)) => map.entry(&k, &v),
1777                        Err(_) => map.entry(&InvalidVariant, &InvalidVariant),
1778                    };
1779                }
1780                map.finish()
1781            }
1782            Variant::List(arr) => {
1783                let mut list = f.debug_list();
1784                for res in arr.iter_try() {
1785                    match res {
1786                        Ok(v) => list.entry(&v),
1787                        Err(_) => list.entry(&InvalidVariant),
1788                    };
1789                }
1790                list.finish()
1791            }
1792        }
1793    }
1794}
1795
1796#[cfg(test)]
1797mod tests {
1798
1799    use super::*;
1800
1801    #[test]
1802    fn test_empty_variant_will_fail() {
1803        let metadata = VariantMetadata::try_new(&[1, 0, 0]).unwrap();
1804
1805        let err = Variant::try_new_with_metadata(metadata, &[]).unwrap_err();
1806
1807        assert!(matches!(
1808            err,
1809            ArrowError::InvalidArgumentError(ref msg) if msg == "Received empty bytes"));
1810    }
1811
1812    #[test]
1813    fn test_construct_short_string() {
1814        let short_string = ShortString::try_new("norm").expect("should fit in short string");
1815        assert_eq!(short_string.as_str(), "norm");
1816
1817        let long_string = "a".repeat(MAX_SHORT_STRING_BYTES + 1);
1818        let res = ShortString::try_new(&long_string);
1819        assert!(res.is_err());
1820    }
1821
1822    #[test]
1823    fn test_variant_decimal_conversion() {
1824        let decimal4 = VariantDecimal4::try_new(1234_i32, 2).unwrap();
1825        let variant = Variant::from(decimal4);
1826        assert_eq!(variant.as_decimal4(), Some(decimal4));
1827
1828        let decimal8 = VariantDecimal8::try_new(12345678901_i64, 2).unwrap();
1829        let variant = Variant::from(decimal8);
1830        assert_eq!(variant.as_decimal8(), Some(decimal8));
1831
1832        let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890_i128, 2).unwrap();
1833        let variant = Variant::from(decimal16);
1834        assert_eq!(variant.as_decimal16(), Some(decimal16));
1835    }
1836
1837    #[test]
1838    fn test_variant_all_subtypes_debug() {
1839        use crate::VariantBuilder;
1840
1841        let mut builder = VariantBuilder::new();
1842
1843        // Create a root object that contains one of every variant subtype
1844        let mut root_obj = builder.new_object();
1845
1846        // Add primitive types
1847        root_obj.insert("null", ());
1848        root_obj.insert("boolean_true", true);
1849        root_obj.insert("boolean_false", false);
1850        root_obj.insert("int8", 42i8);
1851        root_obj.insert("int16", 1234i16);
1852        root_obj.insert("int32", 123456i32);
1853        root_obj.insert("int64", 1234567890123456789i64);
1854        root_obj.insert("float", 1.234f32);
1855        root_obj.insert("double", 1.23456789f64);
1856
1857        // Add date and timestamp types
1858        let date = chrono::NaiveDate::from_ymd_opt(2024, 12, 25).unwrap();
1859        root_obj.insert("date", date);
1860
1861        let timestamp_utc = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1862            .unwrap()
1863            .and_hms_milli_opt(15, 30, 45, 123)
1864            .unwrap()
1865            .and_utc();
1866        root_obj.insert("timestamp_micros", Variant::TimestampMicros(timestamp_utc));
1867
1868        let timestamp_ntz = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1869            .unwrap()
1870            .and_hms_milli_opt(15, 30, 45, 123)
1871            .unwrap();
1872        root_obj.insert(
1873            "timestamp_ntz_micros",
1874            Variant::TimestampNtzMicros(timestamp_ntz),
1875        );
1876
1877        let timestamp_nanos_utc = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
1878            .unwrap()
1879            .and_hms_nano_opt(12, 3, 4, 123456789)
1880            .unwrap()
1881            .and_utc();
1882        root_obj.insert(
1883            "timestamp_nanos",
1884            Variant::TimestampNanos(timestamp_nanos_utc),
1885        );
1886
1887        let timestamp_ntz_nanos = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
1888            .unwrap()
1889            .and_hms_nano_opt(12, 3, 4, 123456789)
1890            .unwrap();
1891        root_obj.insert(
1892            "timestamp_ntz_nanos",
1893            Variant::TimestampNtzNanos(timestamp_ntz_nanos),
1894        );
1895
1896        // Add decimal types
1897        let decimal4 = VariantDecimal4::try_new(1234i32, 2).unwrap();
1898        root_obj.insert("decimal4", decimal4);
1899
1900        let decimal8 = VariantDecimal8::try_new(123456789i64, 3).unwrap();
1901        root_obj.insert("decimal8", decimal8);
1902
1903        let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890i128, 4).unwrap();
1904        root_obj.insert("decimal16", decimal16);
1905
1906        // Add binary and string types
1907        let binary_data = b"\x01\x02\x03\x04\xde\xad\xbe\xef";
1908        root_obj.insert("binary", binary_data.as_slice());
1909
1910        let long_string =
1911            "This is a long string that exceeds the short string limit and contains emoji 🦀";
1912        root_obj.insert("string", long_string);
1913        root_obj.insert("short_string", "Short string with emoji 🎉");
1914        let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1915        root_obj.insert("time", time);
1916
1917        // Add uuid
1918        let uuid = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
1919        root_obj.insert("uuid", Variant::Uuid(uuid));
1920
1921        // Add nested object
1922        let mut nested_obj = root_obj.new_object("nested_object");
1923        nested_obj.insert("inner_key1", "inner_value1");
1924        nested_obj.insert("inner_key2", 999i32);
1925        nested_obj.finish();
1926
1927        // Add list with mixed types
1928        let mut mixed_list = root_obj.new_list("mixed_list");
1929        mixed_list.append_value(1i32);
1930        mixed_list.append_value("two");
1931        mixed_list.append_value(true);
1932        mixed_list.append_value(4.0f32);
1933        mixed_list.append_value(());
1934
1935        // Add nested list inside the mixed list
1936        let mut nested_list = mixed_list.new_list();
1937        nested_list.append_value("nested");
1938        nested_list.append_value(10i8);
1939        nested_list.finish();
1940
1941        mixed_list.finish();
1942
1943        root_obj.finish();
1944
1945        let (metadata, value) = builder.finish();
1946        let variant = Variant::try_new(&metadata, &value).unwrap();
1947
1948        // Test Debug formatter (?)
1949        let debug_output = format!("{:?}", variant);
1950
1951        // Verify that the debug output contains all the expected types
1952        assert!(debug_output.contains("\"null\": Null"));
1953        assert!(debug_output.contains("\"boolean_true\": BooleanTrue"));
1954        assert!(debug_output.contains("\"boolean_false\": BooleanFalse"));
1955        assert!(debug_output.contains("\"int8\": Int8(42)"));
1956        assert!(debug_output.contains("\"int16\": Int16(1234)"));
1957        assert!(debug_output.contains("\"int32\": Int32(123456)"));
1958        assert!(debug_output.contains("\"int64\": Int64(1234567890123456789)"));
1959        assert!(debug_output.contains("\"float\": Float(1.234)"));
1960        assert!(debug_output.contains("\"double\": Double(1.23456789"));
1961        assert!(debug_output.contains("\"date\": Date(2024-12-25)"));
1962        assert!(debug_output.contains("\"timestamp_micros\": TimestampMicros("));
1963        assert!(debug_output.contains("\"timestamp_ntz_micros\": TimestampNtzMicros("));
1964        assert!(debug_output.contains("\"timestamp_nanos\": TimestampNanos("));
1965        assert!(debug_output.contains("\"timestamp_ntz_nanos\": TimestampNtzNanos("));
1966        assert!(debug_output.contains("\"decimal4\": Decimal4("));
1967        assert!(debug_output.contains("\"decimal8\": Decimal8("));
1968        assert!(debug_output.contains("\"decimal16\": Decimal16("));
1969        assert!(debug_output.contains("\"binary\": Binary(01 02 03 04 de ad be ef)"));
1970        assert!(debug_output.contains("\"string\": String("));
1971        assert!(debug_output.contains("\"short_string\": ShortString("));
1972        assert!(debug_output.contains("\"uuid\": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)"));
1973        assert!(debug_output.contains("\"time\": Time(01:02:03.000004)"));
1974        assert!(debug_output.contains("\"nested_object\":"));
1975        assert!(debug_output.contains("\"mixed_list\":"));
1976
1977        let expected = r#"{"binary": Binary(01 02 03 04 de ad be ef), "boolean_false": BooleanFalse, "boolean_true": BooleanTrue, "date": Date(2024-12-25), "decimal16": Decimal16(VariantDecimal16 { integer: 123456789012345678901234567890, scale: 4 }), "decimal4": Decimal4(VariantDecimal4 { integer: 1234, scale: 2 }), "decimal8": Decimal8(VariantDecimal8 { integer: 123456789, scale: 3 }), "double": Double(1.23456789), "float": Float(1.234), "int16": Int16(1234), "int32": Int32(123456), "int64": Int64(1234567890123456789), "int8": Int8(42), "mixed_list": [Int32(1), ShortString(ShortString("two")), BooleanTrue, Float(4.0), Null, [ShortString(ShortString("nested")), Int8(10)]], "nested_object": {"inner_key1": ShortString(ShortString("inner_value1")), "inner_key2": Int32(999)}, "null": Null, "short_string": ShortString(ShortString("Short string with emoji 🎉")), "string": String("This is a long string that exceeds the short string limit and contains emoji 🦀"), "time": Time(01:02:03.000004), "timestamp_micros": TimestampMicros(2024-12-25T15:30:45.123Z), "timestamp_nanos": TimestampNanos(2025-08-15T12:03:04.123456789Z), "timestamp_ntz_micros": TimestampNtzMicros(2024-12-25T15:30:45.123), "timestamp_ntz_nanos": TimestampNtzNanos(2025-08-15T12:03:04.123456789), "uuid": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)}"#;
1978        assert_eq!(debug_output, expected);
1979
1980        // Test alternate Debug formatter (#?)
1981        let alt_debug_output = format!("{:#?}", variant);
1982        let expected = r#"{
1983    "binary": Binary(01 02 03 04 de ad be ef),
1984    "boolean_false": BooleanFalse,
1985    "boolean_true": BooleanTrue,
1986    "date": Date(
1987        2024-12-25,
1988    ),
1989    "decimal16": Decimal16(
1990        VariantDecimal16 {
1991            integer: 123456789012345678901234567890,
1992            scale: 4,
1993        },
1994    ),
1995    "decimal4": Decimal4(
1996        VariantDecimal4 {
1997            integer: 1234,
1998            scale: 2,
1999        },
2000    ),
2001    "decimal8": Decimal8(
2002        VariantDecimal8 {
2003            integer: 123456789,
2004            scale: 3,
2005        },
2006    ),
2007    "double": Double(
2008        1.23456789,
2009    ),
2010    "float": Float(
2011        1.234,
2012    ),
2013    "int16": Int16(
2014        1234,
2015    ),
2016    "int32": Int32(
2017        123456,
2018    ),
2019    "int64": Int64(
2020        1234567890123456789,
2021    ),
2022    "int8": Int8(
2023        42,
2024    ),
2025    "mixed_list": [
2026        Int32(
2027            1,
2028        ),
2029        ShortString(
2030            ShortString(
2031                "two",
2032            ),
2033        ),
2034        BooleanTrue,
2035        Float(
2036            4.0,
2037        ),
2038        Null,
2039        [
2040            ShortString(
2041                ShortString(
2042                    "nested",
2043                ),
2044            ),
2045            Int8(
2046                10,
2047            ),
2048        ],
2049    ],
2050    "nested_object": {
2051        "inner_key1": ShortString(
2052            ShortString(
2053                "inner_value1",
2054            ),
2055        ),
2056        "inner_key2": Int32(
2057            999,
2058        ),
2059    },
2060    "null": Null,
2061    "short_string": ShortString(
2062        ShortString(
2063            "Short string with emoji 🎉",
2064        ),
2065    ),
2066    "string": String(
2067        "This is a long string that exceeds the short string limit and contains emoji 🦀",
2068    ),
2069    "time": Time(
2070        01:02:03.000004,
2071    ),
2072    "timestamp_micros": TimestampMicros(
2073        2024-12-25T15:30:45.123Z,
2074    ),
2075    "timestamp_nanos": TimestampNanos(
2076        2025-08-15T12:03:04.123456789Z,
2077    ),
2078    "timestamp_ntz_micros": TimestampNtzMicros(
2079        2024-12-25T15:30:45.123,
2080    ),
2081    "timestamp_ntz_nanos": TimestampNtzNanos(
2082        2025-08-15T12:03:04.123456789,
2083    ),
2084    "uuid": Uuid(
2085        67e55044-10b1-426f-9247-bb680e5fe0c8,
2086    ),
2087}"#;
2088        assert_eq!(alt_debug_output, expected);
2089    }
2090}