parquet_variant/
variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
19pub use self::list::VariantList;
20pub use self::metadata::VariantMetadata;
21pub use self::object::VariantObject;
22use crate::decoder::{
23    self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
24};
25use crate::path::{VariantPath, VariantPathElement};
26use crate::utils::{first_byte_from_slice, slice_from_slice};
27use std::ops::Deref;
28
29use arrow_schema::ArrowError;
30use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
31use half::f16;
32use uuid::Uuid;
33
34mod decimal;
35mod list;
36mod metadata;
37mod object;
38
39const MAX_SHORT_STRING_BYTES: usize = 0x3F;
40
41/// A Variant [`ShortString`]
42///
43/// This implementation is a zero cost wrapper over `&str` that ensures
44/// the length of the underlying string is a valid Variant short string (63 bytes or less)
45#[derive(Debug, Clone, Copy, PartialEq)]
46pub struct ShortString<'a>(pub(crate) &'a str);
47
48impl<'a> ShortString<'a> {
49    /// Attempts to interpret `value` as a variant short string value.
50    ///
51    /// # Errors
52    ///
53    /// Returns an error if  `value` is longer than the maximum allowed length
54    /// of a Variant short string (63 bytes).
55    pub fn try_new(value: &'a str) -> Result<Self, ArrowError> {
56        if value.len() > MAX_SHORT_STRING_BYTES {
57            return Err(ArrowError::InvalidArgumentError(format!(
58                "value is larger than {MAX_SHORT_STRING_BYTES} bytes"
59            )));
60        }
61
62        Ok(Self(value))
63    }
64
65    /// Returns the underlying Variant short string as a &str
66    pub fn as_str(&self) -> &'a str {
67        self.0
68    }
69}
70
71impl<'a> From<ShortString<'a>> for &'a str {
72    fn from(value: ShortString<'a>) -> Self {
73        value.0
74    }
75}
76
77impl<'a> TryFrom<&'a str> for ShortString<'a> {
78    type Error = ArrowError;
79
80    fn try_from(value: &'a str) -> Result<Self, Self::Error> {
81        Self::try_new(value)
82    }
83}
84
85impl AsRef<str> for ShortString<'_> {
86    fn as_ref(&self) -> &str {
87        self.0
88    }
89}
90
91impl Deref for ShortString<'_> {
92    type Target = str;
93
94    fn deref(&self) -> &Self::Target {
95        self.0
96    }
97}
98
99/// Represents a [Parquet Variant]
100///
101/// The lifetimes `'m` and `'v` are for metadata and value buffers, respectively.
102///
103/// # Background
104///
105/// The [specification] says:
106///
107/// The Variant Binary Encoding allows representation of semi-structured data
108/// (e.g. JSON) in a form that can be efficiently queried by path. The design is
109/// intended to allow efficient access to nested data even in the presence of
110/// very wide or deep structures.
111///
112/// Another motivation for the representation is that (aside from metadata) each
113/// nested Variant value is contiguous and self-contained. For example, in a
114/// Variant containing an Array of Variant values, the representation of an
115/// inner Variant value, when paired with the metadata of the full variant, is
116/// itself a valid Variant.
117///
118/// When stored in Parquet files, Variant fields can also be *shredded*. Shredding
119/// refers to extracting some elements of the variant into separate columns for
120/// more efficient extraction/filter pushdown. The [Variant Shredding
121/// specification] describes the details of shredding Variant values as typed
122/// Parquet columns.
123///
124/// A Variant represents a type that contains one of:
125///
126/// * Primitive: A type and corresponding value (e.g. INT, STRING)
127///
128/// * Array: An ordered list of Variant values
129///
130/// * Object: An unordered collection of string/Variant pairs (i.e. key/value
131///   pairs). An object may not contain duplicate keys.
132///
133/// # Encoding
134///
135/// A Variant is encoded with 2 binary values, the value and the metadata. The
136/// metadata stores a header and an optional dictionary of field names which are
137/// referred to by offset in the value. The value is a binary representation of
138/// the actual data, and varies depending on the type.
139///
140/// # Design Goals
141///
142/// The design goals of the Rust API are as follows:
143/// 1. Speed / Zero copy access (no `clone`ing is required)
144/// 2. Safety
145/// 3. Follow standard Rust conventions
146///
147/// [Parquet Variant]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
148/// [specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
149/// [Variant Shredding specification]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md
150///
151/// # Examples:
152///
153/// ## Creating `Variant` from Rust Types
154/// ```
155/// use parquet_variant::Variant;
156/// // variants can be directly constructed
157/// let variant = Variant::Int32(123);
158/// // or constructed via `From` impls
159/// assert_eq!(variant, Variant::from(123i32));
160/// ```
161/// ## Creating `Variant` from metadata and value
162/// ```
163/// # use parquet_variant::{Variant, VariantMetadata};
164/// let metadata = [0x01, 0x00, 0x00];
165/// let value = [0x09, 0x48, 0x49];
166/// // parse the header metadata
167/// assert_eq!(
168///   Variant::from("HI"),
169///   Variant::new(&metadata, &value)
170/// );
171/// ```
172///
173/// ## Using `Variant` values
174/// ```
175/// # use parquet_variant::Variant;
176/// # let variant = Variant::Int32(123);
177/// // variants can be used in match statements like normal enums
178/// match variant {
179///   Variant::Int32(i) => println!("Integer: {}", i),
180///   Variant::String(s) => println!("String: {}", s),
181///   _ => println!("Other variant"),
182/// }
183/// ```
184///
185/// # Validation
186///
187/// Every instance of variant is either _valid_ or _invalid_. depending on whether the
188/// underlying bytes are a valid encoding of a variant value (see below).
189///
190/// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`], or [`Self::with_full_validation`]
191/// are fully _validated_. They always contain _valid_ data, and infallible accesses such as
192/// iteration and indexing are panic-free. The validation cost is `O(m + v)` where `m` and
193/// `v` are the number of bytes in the metadata and value buffers, respectively.
194///
195/// Instances produced by [`Self::new`] and [`Self::new_with_metadata`] are _unvalidated_ and so
196/// they may contain either _valid_ or _invalid_ data. Infallible accesses to variant objects and
197/// arrays, such as iteration and indexing will panic if the underlying bytes are _invalid_, and
198/// fallible alternatives are provided as panic-free alternatives. [`Self::with_full_validation`] can also be
199/// used to _validate_ an _unvalidated_ instance, if desired.
200///
201/// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller
202/// knows the underlying bytes were already validated previously, or if the caller intends to
203/// perform a small number of (fallible) accesses to a large variant value.
204///
205/// A _validated_ variant value guarantees that the associated [metadata] and all nested [object]
206/// and [array] values are _valid_. Primitive variant subtypes are always _valid_ by construction.
207///
208/// # Safety
209///
210/// Even an _invalid_ variant value is still _safe_ to use in the Rust sense. Accessing it with
211/// infallible methods may cause panics but will never lead to undefined behavior.
212///
213/// [metadata]: VariantMetadata#Validation
214/// [object]: VariantObject#Validation
215/// [array]: VariantList#Validation
216#[derive(Clone, PartialEq)]
217pub enum Variant<'m, 'v> {
218    /// Primitive type: Null
219    Null,
220    /// Primitive (type_id=1): INT(8, SIGNED)
221    Int8(i8),
222    /// Primitive (type_id=1): INT(16, SIGNED)
223    Int16(i16),
224    /// Primitive (type_id=1): INT(32, SIGNED)
225    Int32(i32),
226    /// Primitive (type_id=1): INT(64, SIGNED)
227    Int64(i64),
228    /// Primitive (type_id=1): DATE
229    Date(NaiveDate),
230    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, MICROS)
231    TimestampMicros(DateTime<Utc>),
232    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, MICROS)
233    TimestampNtzMicros(NaiveDateTime),
234    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, NANOS)
235    TimestampNanos(DateTime<Utc>),
236    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, NANOS)
237    TimestampNtzNanos(NaiveDateTime),
238    /// Primitive (type_id=1): DECIMAL(precision, scale) 32-bits
239    Decimal4(VariantDecimal4),
240    /// Primitive (type_id=1): DECIMAL(precision, scale) 64-bits
241    Decimal8(VariantDecimal8),
242    /// Primitive (type_id=1): DECIMAL(precision, scale) 128-bits
243    Decimal16(VariantDecimal16),
244    /// Primitive (type_id=1): FLOAT
245    Float(f32),
246    /// Primitive (type_id=1): DOUBLE
247    Double(f64),
248    /// Primitive (type_id=1): BOOLEAN (true)
249    BooleanTrue,
250    /// Primitive (type_id=1): BOOLEAN (false)
251    BooleanFalse,
252    // Note: only need the *value* buffer for these types
253    /// Primitive (type_id=1): BINARY
254    Binary(&'v [u8]),
255    /// Primitive (type_id=1): STRING
256    String(&'v str),
257    /// Primitive (type_id=1): TIME(isAdjustedToUTC=false, MICROS)
258    Time(NaiveTime),
259    /// Primitive (type_id=1): UUID
260    Uuid(Uuid),
261    /// Short String (type_id=2): STRING
262    ShortString(ShortString<'v>),
263    // need both metadata & value
264    /// Object (type_id=3): N/A
265    Object(VariantObject<'m, 'v>),
266    /// Array (type_id=4): N/A
267    List(VariantList<'m, 'v>),
268}
269
270// We don't want this to grow because it could hurt performance of a frequently-created type.
271const _: () = crate::utils::expect_size_of::<Variant>(80);
272
273impl<'m, 'v> Variant<'m, 'v> {
274    /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
275    ///
276    /// The instance is fully [validated].
277    ///
278    /// # Example
279    /// ```
280    /// use parquet_variant::{Variant, VariantMetadata};
281    /// let metadata = [0x01, 0x00, 0x00];
282    /// let value = [0x09, 0x48, 0x49];
283    /// // parse the header metadata
284    /// assert_eq!(
285    ///   Variant::from("HI"),
286    ///   Variant::try_new(&metadata, &value).unwrap()
287    /// );
288    /// ```
289    ///
290    /// [validated]: Self#Validation
291    pub fn try_new(metadata: &'m [u8], value: &'v [u8]) -> Result<Self, ArrowError> {
292        let metadata = VariantMetadata::try_new(metadata)?;
293        Self::try_new_with_metadata(metadata, value)
294    }
295
296    /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
297    ///
298    /// The instance is [unvalidated].
299    ///
300    /// # Example
301    /// ```
302    /// use parquet_variant::{Variant, VariantMetadata};
303    /// let metadata = [0x01, 0x00, 0x00];
304    /// let value = [0x09, 0x48, 0x49];
305    /// // parse the header metadata
306    /// assert_eq!(
307    ///   Variant::from("HI"),
308    ///   Variant::new(&metadata, &value)
309    /// );
310    /// ```
311    ///
312    /// [unvalidated]: Self#Validation
313    pub fn new(metadata: &'m [u8], value: &'v [u8]) -> Self {
314        let metadata = VariantMetadata::try_new_with_shallow_validation(metadata)
315            .expect("Invalid variant metadata");
316        Self::try_new_with_metadata_and_shallow_validation(metadata, value)
317            .expect("Invalid variant data")
318    }
319
320    /// Create a new variant with existing metadata.
321    ///
322    /// The instance is fully [validated].
323    ///
324    /// # Example
325    /// ```
326    /// # use parquet_variant::{Variant, VariantMetadata};
327    /// let metadata = [0x01, 0x00, 0x00];
328    /// let value = [0x09, 0x48, 0x49];
329    /// // parse the header metadata first
330    /// let metadata = VariantMetadata::new(&metadata);
331    /// assert_eq!(
332    ///   Variant::from("HI"),
333    ///   Variant::try_new_with_metadata(metadata, &value).unwrap()
334    /// );
335    /// ```
336    ///
337    /// [validated]: Self#Validation
338    pub fn try_new_with_metadata(
339        metadata: VariantMetadata<'m>,
340        value: &'v [u8],
341    ) -> Result<Self, ArrowError> {
342        Self::try_new_with_metadata_and_shallow_validation(metadata, value)?.with_full_validation()
343    }
344
345    /// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
346    ///
347    /// [unvalidated]: Self#Validation
348    pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
349        Self::try_new_with_metadata_and_shallow_validation(metadata, value)
350            .expect("Invalid variant")
351    }
352
353    // The actual constructor, which only performs shallow (constant-time) validation.
354    fn try_new_with_metadata_and_shallow_validation(
355        metadata: VariantMetadata<'m>,
356        value: &'v [u8],
357    ) -> Result<Self, ArrowError> {
358        let value_metadata = first_byte_from_slice(value)?;
359        let value_data = slice_from_slice(value, 1..)?;
360        let new_self = match get_basic_type(value_metadata) {
361            VariantBasicType::Primitive => match get_primitive_type(value_metadata)? {
362                VariantPrimitiveType::Null => Variant::Null,
363                VariantPrimitiveType::Int8 => Variant::Int8(decoder::decode_int8(value_data)?),
364                VariantPrimitiveType::Int16 => Variant::Int16(decoder::decode_int16(value_data)?),
365                VariantPrimitiveType::Int32 => Variant::Int32(decoder::decode_int32(value_data)?),
366                VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
367                VariantPrimitiveType::Decimal4 => {
368                    let (integer, scale) = decoder::decode_decimal4(value_data)?;
369                    Variant::Decimal4(VariantDecimal4::try_new(integer, scale)?)
370                }
371                VariantPrimitiveType::Decimal8 => {
372                    let (integer, scale) = decoder::decode_decimal8(value_data)?;
373                    Variant::Decimal8(VariantDecimal8::try_new(integer, scale)?)
374                }
375                VariantPrimitiveType::Decimal16 => {
376                    let (integer, scale) = decoder::decode_decimal16(value_data)?;
377                    Variant::Decimal16(VariantDecimal16::try_new(integer, scale)?)
378                }
379                VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
380                VariantPrimitiveType::Double => {
381                    Variant::Double(decoder::decode_double(value_data)?)
382                }
383                VariantPrimitiveType::BooleanTrue => Variant::BooleanTrue,
384                VariantPrimitiveType::BooleanFalse => Variant::BooleanFalse,
385                VariantPrimitiveType::Date => Variant::Date(decoder::decode_date(value_data)?),
386                VariantPrimitiveType::TimestampMicros => {
387                    Variant::TimestampMicros(decoder::decode_timestamp_micros(value_data)?)
388                }
389                VariantPrimitiveType::TimestampNtzMicros => {
390                    Variant::TimestampNtzMicros(decoder::decode_timestampntz_micros(value_data)?)
391                }
392                VariantPrimitiveType::TimestampNanos => {
393                    Variant::TimestampNanos(decoder::decode_timestamp_nanos(value_data)?)
394                }
395                VariantPrimitiveType::TimestampNtzNanos => {
396                    Variant::TimestampNtzNanos(decoder::decode_timestampntz_nanos(value_data)?)
397                }
398                VariantPrimitiveType::Uuid => Variant::Uuid(decoder::decode_uuid(value_data)?),
399                VariantPrimitiveType::Binary => {
400                    Variant::Binary(decoder::decode_binary(value_data)?)
401                }
402                VariantPrimitiveType::String => {
403                    Variant::String(decoder::decode_long_string(value_data)?)
404                }
405                VariantPrimitiveType::Time => Variant::Time(decoder::decode_time_ntz(value_data)?),
406            },
407            VariantBasicType::ShortString => {
408                Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
409            }
410            VariantBasicType::Object => Variant::Object(
411                VariantObject::try_new_with_shallow_validation(metadata, value)?,
412            ),
413            VariantBasicType::Array => Variant::List(VariantList::try_new_with_shallow_validation(
414                metadata, value,
415            )?),
416        };
417        Ok(new_self)
418    }
419
420    /// True if this variant instance has already been [validated].
421    ///
422    /// [validated]: Self#Validation
423    pub fn is_fully_validated(&self) -> bool {
424        match self {
425            Variant::List(list) => list.is_fully_validated(),
426            Variant::Object(obj) => obj.is_fully_validated(),
427            _ => true,
428        }
429    }
430
431    /// Recursively validates this variant value, ensuring that infallible access will not panic due
432    /// to invalid bytes.
433    ///
434    /// Variant leaf values are always valid by construction, but [objects] and [arrays] can be
435    /// constructed in unvalidated (and potentially invalid) state.
436    ///
437    /// If [`Self::is_fully_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)`
438    /// where `m` and `v` are the sizes of metadata and value buffers, respectively.
439    ///
440    /// [objects]: VariantObject#Validation
441    /// [arrays]: VariantList#Validation
442    pub fn with_full_validation(self) -> Result<Self, ArrowError> {
443        use Variant::*;
444        match self {
445            List(list) => list.with_full_validation().map(List),
446            Object(obj) => obj.with_full_validation().map(Object),
447            _ => Ok(self),
448        }
449    }
450
451    /// Converts this variant to `()` if it is null.
452    ///
453    /// Returns `Some(())` for null variants,
454    /// `None` for non-null variants.
455    ///
456    /// # Examples
457    ///
458    /// ```
459    /// use parquet_variant::Variant;
460    ///
461    /// // you can extract `()` from a null variant
462    /// let v1 = Variant::from(());
463    /// assert_eq!(v1.as_null(), Some(()));
464    ///
465    /// // but not from other variants
466    /// let v2 = Variant::from("hello!");
467    /// assert_eq!(v2.as_null(), None);
468    /// ```
469    pub fn as_null(&self) -> Option<()> {
470        matches!(self, Variant::Null).then_some(())
471    }
472
473    /// Converts this variant to a `bool` if possible.
474    ///
475    /// Returns `Some(bool)` for boolean variants,
476    /// `None` for non-boolean variants.
477    ///
478    /// # Examples
479    ///
480    /// ```
481    /// use parquet_variant::Variant;
482    ///
483    /// // you can extract a bool from the true variant
484    /// let v1 = Variant::from(true);
485    /// assert_eq!(v1.as_boolean(), Some(true));
486    ///
487    /// // and the false variant
488    /// let v2 = Variant::from(false);
489    /// assert_eq!(v2.as_boolean(), Some(false));
490    ///
491    /// // but not from other variants
492    /// let v3 = Variant::from("hello!");
493    /// assert_eq!(v3.as_boolean(), None);
494    /// ```
495    pub fn as_boolean(&self) -> Option<bool> {
496        match self {
497            Variant::BooleanTrue => Some(true),
498            Variant::BooleanFalse => Some(false),
499            _ => None,
500        }
501    }
502
503    /// Converts this variant to a `NaiveDate` if possible.
504    ///
505    /// Returns `Some(NaiveDate)` for date variants,
506    /// `None` for non-date variants.
507    ///
508    /// # Examples
509    ///
510    /// ```
511    /// use parquet_variant::Variant;
512    /// use chrono::NaiveDate;
513    ///
514    /// // you can extract a NaiveDate from a date variant
515    /// let date = NaiveDate::from_ymd_opt(2025, 4, 12).unwrap();
516    /// let v1 = Variant::from(date);
517    /// assert_eq!(v1.as_naive_date(), Some(date));
518    ///
519    /// // but not from other variants
520    /// let v2 = Variant::from("hello!");
521    /// assert_eq!(v2.as_naive_date(), None);
522    /// ```
523    pub fn as_naive_date(&self) -> Option<NaiveDate> {
524        if let Variant::Date(d) = self {
525            Some(*d)
526        } else {
527            None
528        }
529    }
530
531    /// Converts this variant to a `DateTime<Utc>` if possible.
532    ///
533    /// Returns `Some(DateTime<Utc>)` for timestamp variants,
534    /// `None` for non-timestamp variants.
535    ///
536    /// # Examples
537    ///
538    /// ```
539    /// use parquet_variant::Variant;
540    /// use chrono::NaiveDate;
541    ///
542    /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
543    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
544    /// let v1 = Variant::from(datetime);
545    /// assert_eq!(v1.as_datetime_utc(), Some(datetime));
546    /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap().and_utc();
547    /// let v2 = Variant::from(datetime_nanos);
548    /// assert_eq!(v2.as_datetime_utc(), Some(datetime_nanos));
549    ///
550    /// // but not from other variants
551    /// let v3 = Variant::from("hello!");
552    /// assert_eq!(v3.as_datetime_utc(), None);
553    /// ```
554    pub fn as_datetime_utc(&self) -> Option<DateTime<Utc>> {
555        match *self {
556            Variant::TimestampMicros(d) | Variant::TimestampNanos(d) => Some(d),
557            _ => None,
558        }
559    }
560
561    /// Converts this variant to a `NaiveDateTime` if possible.
562    ///
563    /// Returns `Some(NaiveDateTime)` for timestamp variants,
564    /// `None` for non-timestamp variants.
565    ///
566    /// # Examples
567    ///
568    /// ```
569    /// use parquet_variant::Variant;
570    /// use chrono::NaiveDate;
571    ///
572    /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
573    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
574    /// let v1 = Variant::from(datetime);
575    /// assert_eq!(v1.as_naive_datetime(), Some(datetime));
576    ///
577    /// // or a UTC-adjusted variant
578    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_nano_opt(12, 34, 56, 123456789).unwrap();
579    /// let v2 = Variant::from(datetime);
580    /// assert_eq!(v2.as_naive_datetime(), Some(datetime));
581    ///
582    /// // but not from other variants
583    /// let v3 = Variant::from("hello!");
584    /// assert_eq!(v3.as_naive_datetime(), None);
585    /// ```
586    pub fn as_naive_datetime(&self) -> Option<NaiveDateTime> {
587        match *self {
588            Variant::TimestampNtzMicros(d) | Variant::TimestampNtzNanos(d) => Some(d),
589            _ => None,
590        }
591    }
592
593    /// Converts this variant to a `&[u8]` if possible.
594    ///
595    /// Returns `Some(&[u8])` for binary variants,
596    /// `None` for non-binary variants.
597    ///
598    /// # Examples
599    ///
600    /// ```
601    /// use parquet_variant::Variant;
602    ///
603    /// // you can extract a byte slice from a binary variant
604    /// let data = b"hello!";
605    /// let v1 = Variant::Binary(data);
606    /// assert_eq!(v1.as_u8_slice(), Some(data.as_slice()));
607    ///
608    /// // but not from other variant types
609    /// let v2 = Variant::from(123i64);
610    /// assert_eq!(v2.as_u8_slice(), None);
611    /// ```
612    pub fn as_u8_slice(&'v self) -> Option<&'v [u8]> {
613        if let Variant::Binary(d) = self {
614            Some(d)
615        } else {
616            None
617        }
618    }
619
620    /// Converts this variant to a `&str` if possible.
621    ///
622    /// Returns `Some(&str)` for string variants (both regular and short strings),
623    /// `None` for non-string variants.
624    ///
625    /// # Examples
626    ///
627    /// ```
628    /// use parquet_variant::Variant;
629    ///
630    /// // you can extract a string from string variants
631    /// let s = "hello!";
632    /// let v1 = Variant::from(s);
633    /// assert_eq!(v1.as_string(), Some(s));
634    ///
635    /// // but not from other variants
636    /// let v2 = Variant::from(123i64);
637    /// assert_eq!(v2.as_string(), None);
638    /// ```
639    pub fn as_string(&'v self) -> Option<&'v str> {
640        match self {
641            Variant::String(s) | Variant::ShortString(ShortString(s)) => Some(s),
642            _ => None,
643        }
644    }
645
646    /// Converts this variant to a `uuid hyphenated string` if possible.
647    ///
648    /// Returns `Some(String)` for UUID variants, `None` for non-UUID variants.
649    ///
650    /// # Examples
651    ///
652    /// ```
653    /// use parquet_variant::Variant;
654    ///
655    /// // You can extract a UUID from a UUID variant
656    /// let s = uuid::Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
657    /// let v1 = Variant::Uuid(s);
658    /// assert_eq!(s, v1.as_uuid().unwrap());
659    /// assert_eq!("67e55044-10b1-426f-9247-bb680e5fe0c8", v1.as_uuid().unwrap().to_string());
660    ///
661    /// //but not from other variants
662    /// let v2 = Variant::from(1234);
663    /// assert_eq!(None, v2.as_uuid())
664    /// ```
665    pub fn as_uuid(&self) -> Option<Uuid> {
666        match self {
667            Variant::Uuid(u) => Some(*u),
668            _ => None,
669        }
670    }
671
672    /// Converts this variant to an `i8` if possible.
673    ///
674    /// Returns `Some(i8)` for integer variants that fit in `i8` range,
675    /// `None` for non-integer variants or values that would overflow.
676    ///
677    /// # Examples
678    ///
679    /// ```
680    /// use parquet_variant::Variant;
681    ///
682    /// // you can read an int64 variant into an i8 if it fits
683    /// let v1 = Variant::from(123i64);
684    /// assert_eq!(v1.as_int8(), Some(123i8));
685    ///
686    /// // but not if it would overflow
687    /// let v2 = Variant::from(1234i64);
688    /// assert_eq!(v2.as_int8(), None);
689    ///
690    /// // or if the variant cannot be cast into an integer
691    /// let v3 = Variant::from("hello!");
692    /// assert_eq!(v3.as_int8(), None);
693    /// ```
694    pub fn as_int8(&self) -> Option<i8> {
695        match *self {
696            Variant::Int8(i) => Some(i),
697            Variant::Int16(i) => i.try_into().ok(),
698            Variant::Int32(i) => i.try_into().ok(),
699            Variant::Int64(i) => i.try_into().ok(),
700            Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(),
701            Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
702            Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
703            _ => None,
704        }
705    }
706
707    /// Converts this variant to an `i16` if possible.
708    ///
709    /// Returns `Some(i16)` for integer variants that fit in `i16` range,
710    /// `None` for non-integer variants or values that would overflow.
711    ///
712    /// # Examples
713    ///
714    /// ```
715    /// use parquet_variant::Variant;
716    ///
717    /// // you can read an int64 variant into an i16 if it fits
718    /// let v1 = Variant::from(123i64);
719    /// assert_eq!(v1.as_int16(), Some(123i16));
720    ///
721    /// // but not if it would overflow
722    /// let v2 = Variant::from(123456i64);
723    /// assert_eq!(v2.as_int16(), None);
724    ///
725    /// // or if the variant cannot be cast into an integer
726    /// let v3 = Variant::from("hello!");
727    /// assert_eq!(v3.as_int16(), None);
728    /// ```
729    pub fn as_int16(&self) -> Option<i16> {
730        match *self {
731            Variant::Int8(i) => Some(i.into()),
732            Variant::Int16(i) => Some(i),
733            Variant::Int32(i) => i.try_into().ok(),
734            Variant::Int64(i) => i.try_into().ok(),
735            Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(),
736            Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
737            Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
738            _ => None,
739        }
740    }
741
742    /// Converts this variant to an `i32` if possible.
743    ///
744    /// Returns `Some(i32)` for integer variants that fit in `i32` range,
745    /// `None` for non-integer variants or values that would overflow.
746    ///
747    /// # Examples
748    ///
749    /// ```
750    /// use parquet_variant::Variant;
751    ///
752    /// // you can read an int64 variant into an i32 if it fits
753    /// let v1 = Variant::from(123i64);
754    /// assert_eq!(v1.as_int32(), Some(123i32));
755    ///
756    /// // but not if it would overflow
757    /// let v2 = Variant::from(12345678901i64);
758    /// assert_eq!(v2.as_int32(), None);
759    ///
760    /// // or if the variant cannot be cast into an integer
761    /// let v3 = Variant::from("hello!");
762    /// assert_eq!(v3.as_int32(), None);
763    /// ```
764    pub fn as_int32(&self) -> Option<i32> {
765        match *self {
766            Variant::Int8(i) => Some(i.into()),
767            Variant::Int16(i) => Some(i.into()),
768            Variant::Int32(i) => Some(i),
769            Variant::Int64(i) => i.try_into().ok(),
770            Variant::Decimal4(d) if d.scale() == 0 => Some(d.integer()),
771            Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
772            Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
773            _ => None,
774        }
775    }
776
777    /// Converts this variant to an `i64` if possible.
778    ///
779    /// Returns `Some(i64)` for integer variants that fit in `i64` range,
780    /// `None` for non-integer variants or values that would overflow.
781    ///
782    /// # Examples
783    ///
784    /// ```
785    /// use parquet_variant::Variant;
786    ///
787    /// // you can read an int64 variant into an i64
788    /// let v1 = Variant::from(123i64);
789    /// assert_eq!(v1.as_int64(), Some(123i64));
790    ///
791    /// // but not a variant that cannot be cast into an integer
792    /// let v2 = Variant::from("hello!");
793    /// assert_eq!(v2.as_int64(), None);
794    /// ```
795    pub fn as_int64(&self) -> Option<i64> {
796        match *self {
797            Variant::Int8(i) => Some(i.into()),
798            Variant::Int16(i) => Some(i.into()),
799            Variant::Int32(i) => Some(i.into()),
800            Variant::Int64(i) => Some(i),
801            Variant::Decimal4(d) if d.scale() == 0 => Some(d.integer().into()),
802            Variant::Decimal8(d) if d.scale() == 0 => Some(d.integer()),
803            Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
804            _ => None,
805        }
806    }
807
808    /// Converts this variant to tuple with a 4-byte unscaled value if possible.
809    ///
810    /// Returns `Some((i32, u8))` for decimal variants where the unscaled value
811    /// fits in `i32` range,
812    /// `None` for non-decimal variants or decimal values that would overflow.
813    ///
814    /// # Examples
815    ///
816    /// ```
817    /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8};
818    ///
819    /// // you can extract decimal parts from smaller or equally-sized decimal variants
820    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
821    /// assert_eq!(v1.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
822    ///
823    /// // and from larger decimal variants if they fit
824    /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap());
825    /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
826    ///
827    /// // but not if the value would overflow i32
828    /// let v3 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap());
829    /// assert_eq!(v3.as_decimal4(), None);
830    ///
831    /// // or if the variant is not a decimal
832    /// let v4 = Variant::from("hello!");
833    /// assert_eq!(v4.as_decimal4(), None);
834    /// ```
835    pub fn as_decimal4(&self) -> Option<VariantDecimal4> {
836        match *self {
837            Variant::Int8(i) => i32::from(i).try_into().ok(),
838            Variant::Int16(i) => i32::from(i).try_into().ok(),
839            Variant::Int32(i) => i.try_into().ok(),
840            Variant::Int64(i) => i32::try_from(i).ok()?.try_into().ok(),
841            Variant::Decimal4(decimal4) => Some(decimal4),
842            Variant::Decimal8(decimal8) => decimal8.try_into().ok(),
843            Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
844            _ => None,
845        }
846    }
847
848    /// Converts this variant to tuple with an 8-byte unscaled value if possible.
849    ///
850    /// Returns `Some((i64, u8))` for decimal variants where the unscaled value
851    /// fits in `i64` range,
852    /// `None` for non-decimal variants or decimal values that would overflow.
853    ///
854    /// # Examples
855    ///
856    /// ```
857    /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
858    ///
859    /// // you can extract decimal parts from smaller or equally-sized decimal variants
860    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
861    /// assert_eq!(v1.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
862    ///
863    /// // and from larger decimal variants if they fit
864    /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap());
865    /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
866    ///
867    /// // but not if the value would overflow i64
868    /// let v3 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap());
869    /// assert_eq!(v3.as_decimal8(), None);
870    ///
871    /// // or if the variant is not a decimal
872    /// let v4 = Variant::from("hello!");
873    /// assert_eq!(v4.as_decimal8(), None);
874    /// ```
875    pub fn as_decimal8(&self) -> Option<VariantDecimal8> {
876        match *self {
877            Variant::Int8(i) => i64::from(i).try_into().ok(),
878            Variant::Int16(i) => i64::from(i).try_into().ok(),
879            Variant::Int32(i) => i64::from(i).try_into().ok(),
880            Variant::Int64(i) => i.try_into().ok(),
881            Variant::Decimal4(decimal4) => Some(decimal4.into()),
882            Variant::Decimal8(decimal8) => Some(decimal8),
883            Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
884            _ => None,
885        }
886    }
887
888    /// Converts this variant to tuple with a 16-byte unscaled value if possible.
889    ///
890    /// Returns `Some((i128, u8))` for decimal variants where the unscaled value
891    /// fits in `i128` range,
892    /// `None` for non-decimal variants or decimal values that would overflow.
893    ///
894    /// # Examples
895    ///
896    /// ```
897    /// use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4};
898    ///
899    /// // you can extract decimal parts from smaller or equally-sized decimal variants
900    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
901    /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok());
902    ///
903    /// // but not if the variant is not a decimal
904    /// let v2 = Variant::from("hello!");
905    /// assert_eq!(v2.as_decimal16(), None);
906    /// ```
907    pub fn as_decimal16(&self) -> Option<VariantDecimal16> {
908        match *self {
909            Variant::Int8(i) => i128::from(i).try_into().ok(),
910            Variant::Int16(i) => i128::from(i).try_into().ok(),
911            Variant::Int32(i) => i128::from(i).try_into().ok(),
912            Variant::Int64(i) => i128::from(i).try_into().ok(),
913            Variant::Decimal4(decimal4) => Some(decimal4.into()),
914            Variant::Decimal8(decimal8) => Some(decimal8.into()),
915            Variant::Decimal16(decimal16) => Some(decimal16),
916            _ => None,
917        }
918    }
919
920    /// Converts this variant to an `f16` if possible.
921    ///
922    /// Returns `Some(f16)` for float and double variants,
923    /// `None` for non-floating-point variants.
924    ///
925    /// # Example
926    ///
927    /// ```
928    /// use parquet_variant::Variant;
929    /// use half::f16;
930    ///
931    /// // you can extract an f16 from a float variant
932    /// let v1 = Variant::from(std::f32::consts::PI);
933    /// assert_eq!(v1.as_f16(), Some(f16::from_f32(std::f32::consts::PI)));
934    ///
935    /// // and from a double variant (with loss of precision to nearest f16)
936    /// let v2 = Variant::from(std::f64::consts::PI);
937    /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI)));
938    ///
939    /// // but not from other variants
940    /// let v3 = Variant::from("hello!");
941    /// assert_eq!(v3.as_f16(), None);
942    pub fn as_f16(&self) -> Option<f16> {
943        match *self {
944            Variant::Float(i) => Some(f16::from_f32(i)),
945            Variant::Double(i) => Some(f16::from_f64(i)),
946            _ => None,
947        }
948    }
949
950    /// Converts this variant to an `f32` if possible.
951    ///
952    /// Returns `Some(f32)` for float and double variants,
953    /// `None` for non-floating-point variants.
954    ///
955    /// # Examples
956    ///
957    /// ```
958    /// use parquet_variant::Variant;
959    ///
960    /// // you can extract an f32 from a float variant
961    /// let v1 = Variant::from(std::f32::consts::PI);
962    /// assert_eq!(v1.as_f32(), Some(std::f32::consts::PI));
963    ///
964    /// // and from a double variant (with loss of precision to nearest f32)
965    /// let v2 = Variant::from(std::f64::consts::PI);
966    /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
967    ///
968    /// // but not from other variants
969    /// let v3 = Variant::from("hello!");
970    /// assert_eq!(v3.as_f32(), None);
971    /// ```
972    #[allow(clippy::cast_possible_truncation)]
973    pub fn as_f32(&self) -> Option<f32> {
974        match *self {
975            Variant::Float(i) => Some(i),
976            Variant::Double(i) => Some(i as f32),
977            _ => None,
978        }
979    }
980
981    /// Converts this variant to an `f64` if possible.
982    ///
983    /// Returns `Some(f64)` for float and double variants,
984    /// `None` for non-floating-point variants.
985    ///
986    /// # Examples
987    ///
988    /// ```
989    /// use parquet_variant::Variant;
990    ///
991    /// // you can extract an f64 from a float variant
992    /// let v1 = Variant::from(std::f32::consts::PI);
993    /// assert_eq!(v1.as_f64(), Some(std::f32::consts::PI as f64));
994    ///
995    /// // and from a double variant
996    /// let v2 = Variant::from(std::f64::consts::PI);
997    /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
998    ///
999    /// // but not from other variants
1000    /// let v3 = Variant::from("hello!");
1001    /// assert_eq!(v3.as_f64(), None);
1002    /// ```
1003    pub fn as_f64(&self) -> Option<f64> {
1004        match *self {
1005            Variant::Float(i) => Some(i.into()),
1006            Variant::Double(i) => Some(i),
1007            _ => None,
1008        }
1009    }
1010
1011    /// Converts this variant to an `Object` if it is an [`VariantObject`].
1012    ///
1013    /// Returns `Some(&VariantObject)` for object variants,
1014    /// `None` for non-object variants.
1015    ///
1016    /// See [`Self::get_path`] to dynamically traverse objects
1017    ///
1018    /// # Examples
1019    /// ```
1020    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1021    /// # let (metadata, value) = {
1022    /// # let mut builder = VariantBuilder::new();
1023    /// #   let mut obj = builder.new_object();
1024    /// #   obj.insert("name", "John");
1025    /// #   obj.finish();
1026    /// #   builder.finish()
1027    /// # };
1028    /// // object that is {"name": "John"}
1029    ///  let variant = Variant::new(&metadata, &value);
1030    /// // use the `as_object` method to access the object
1031    /// let obj = variant.as_object().expect("variant should be an object");
1032    /// assert_eq!(obj.get("name"), Some(Variant::from("John")));
1033    /// ```
1034    pub fn as_object(&'m self) -> Option<&'m VariantObject<'m, 'v>> {
1035        if let Variant::Object(obj) = self {
1036            Some(obj)
1037        } else {
1038            None
1039        }
1040    }
1041
1042    /// If this is an object and the requested field name exists, retrieves the corresponding field
1043    /// value. Otherwise, returns None.
1044    ///
1045    /// This is shorthand for [`Self::as_object`] followed by [`VariantObject::get`].
1046    ///
1047    /// # Examples
1048    /// ```
1049    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1050    /// # let mut builder = VariantBuilder::new();
1051    /// # let mut obj = builder.new_object();
1052    /// # obj.insert("name", "John");
1053    /// # obj.finish();
1054    /// # let (metadata, value) = builder.finish();
1055    /// // object that is {"name": "John"}
1056    ///  let variant = Variant::new(&metadata, &value);
1057    /// // use the `get_object_field` method to access the object
1058    /// let obj = variant.get_object_field("name");
1059    /// assert_eq!(obj, Some(Variant::from("John")));
1060    /// let obj = variant.get_object_field("foo");
1061    /// assert!(obj.is_none());
1062    /// ```
1063    pub fn get_object_field(&self, field_name: &str) -> Option<Self> {
1064        match self {
1065            Variant::Object(object) => object.get(field_name),
1066            _ => None,
1067        }
1068    }
1069
1070    /// Converts this variant to a `List` if it is a [`VariantList`].
1071    ///
1072    /// Returns `Some(&VariantList)` for list variants,
1073    /// `None` for non-list variants.
1074    ///
1075    /// See [`Self::get_path`] to dynamically traverse lists
1076    ///
1077    /// # Examples
1078    /// ```
1079    /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1080    /// # let (metadata, value) = {
1081    /// # let mut builder = VariantBuilder::new();
1082    /// #   let mut list = builder.new_list();
1083    /// #   list.append_value("John");
1084    /// #   list.append_value("Doe");
1085    /// #   list.finish();
1086    /// #   builder.finish()
1087    /// # };
1088    /// // list that is ["John", "Doe"]
1089    /// let variant = Variant::new(&metadata, &value);
1090    /// // use the `as_list` method to access the list
1091    /// let list = variant.as_list().expect("variant should be a list");
1092    /// assert_eq!(list.len(), 2);
1093    /// assert_eq!(list.get(0).unwrap(), Variant::from("John"));
1094    /// assert_eq!(list.get(1).unwrap(), Variant::from("Doe"));
1095    /// ```
1096    pub fn as_list(&'m self) -> Option<&'m VariantList<'m, 'v>> {
1097        if let Variant::List(list) = self {
1098            Some(list)
1099        } else {
1100            None
1101        }
1102    }
1103
1104    /// Converts this variant to a `NaiveTime` if possible.
1105    ///
1106    /// Returns `Some(NaiveTime)` for `Variant::Time`,
1107    /// `None` for non-Time variants.
1108    ///
1109    /// # Example
1110    ///
1111    /// ```
1112    /// use chrono::NaiveTime;
1113    /// use parquet_variant::Variant;
1114    ///
1115    /// // you can extract a `NaiveTime` from a `Variant::Time`
1116    /// let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1117    /// let v1 = Variant::from(time);
1118    /// assert_eq!(Some(time), v1.as_time_utc());
1119    ///
1120    /// // but not from other variants.
1121    /// let v2 = Variant::from("Hello");
1122    /// assert_eq!(None, v2.as_time_utc());
1123    /// ```
1124    pub fn as_time_utc(&'m self) -> Option<NaiveTime> {
1125        if let Variant::Time(time) = self {
1126            Some(*time)
1127        } else {
1128            None
1129        }
1130    }
1131
1132    /// If this is a list and the requested index is in bounds, retrieves the corresponding
1133    /// element. Otherwise, returns None.
1134    ///
1135    /// This is shorthand for [`Self::as_list`] followed by [`VariantList::get`].
1136    ///
1137    /// # Examples
1138    /// ```
1139    /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1140    /// # let mut builder = VariantBuilder::new();
1141    /// # let mut list = builder.new_list();
1142    /// # list.append_value("John");
1143    /// # list.append_value("Doe");
1144    /// # list.finish();
1145    /// # let (metadata, value) = builder.finish();
1146    /// // list that is ["John", "Doe"]
1147    /// let variant = Variant::new(&metadata, &value);
1148    /// // use the `get_list_element` method to access the list
1149    /// assert_eq!(variant.get_list_element(0), Some(Variant::from("John")));
1150    /// assert_eq!(variant.get_list_element(1), Some(Variant::from("Doe")));
1151    /// assert!(variant.get_list_element(2).is_none());
1152    /// ```
1153    pub fn get_list_element(&self, index: usize) -> Option<Self> {
1154        match self {
1155            Variant::List(list) => list.get(index),
1156            _ => None,
1157        }
1158    }
1159
1160    /// Return the metadata associated with this variant, if any.
1161    ///
1162    /// Returns `Some(&VariantMetadata)` for object and list variants,
1163    pub fn metadata(&self) -> Option<&'m VariantMetadata<'_>> {
1164        match self {
1165            Variant::Object(VariantObject { metadata, .. })
1166            | Variant::List(VariantList { metadata, .. }) => Some(metadata),
1167            _ => None,
1168        }
1169    }
1170
1171    /// Return a new Variant with the path followed.
1172    ///
1173    /// If the path is not found, `None` is returned.
1174    ///
1175    /// # Example
1176    /// ```
1177    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath};
1178    /// # let mut builder = VariantBuilder::new();
1179    /// # let mut obj = builder.new_object();
1180    /// # let mut list = obj.new_list("foo");
1181    /// # list.append_value("bar");
1182    /// # list.append_value("baz");
1183    /// # list.finish();
1184    /// # obj.finish();
1185    /// # let (metadata, value) = builder.finish();
1186    /// // given a variant like `{"foo": ["bar", "baz"]}`
1187    /// let variant = Variant::new(&metadata, &value);
1188    /// // Accessing a non existent path returns None
1189    /// assert_eq!(variant.get_path(&VariantPath::from("non_existent")), None);
1190    /// // Access obj["foo"]
1191    /// let path = VariantPath::from("foo");
1192    /// let foo = variant.get_path(&path).expect("field `foo` should exist");
1193    /// assert!(foo.as_list().is_some(), "field `foo` should be a list");
1194    /// // Access foo[0]
1195    /// let path = VariantPath::from(0);
1196    /// let bar = foo.get_path(&path).expect("element 0 should exist");
1197    /// // bar is a string
1198    /// assert_eq!(bar.as_string(), Some("bar"));
1199    /// // You can also access nested paths
1200    /// let path = VariantPath::from("foo").join(0);
1201    /// assert_eq!(variant.get_path(&path).unwrap(), bar);
1202    /// ```
1203    pub fn get_path(&self, path: &VariantPath) -> Option<Variant<'_, '_>> {
1204        path.iter()
1205            .try_fold(self.clone(), |output, element| match element {
1206                VariantPathElement::Field { name } => output.get_object_field(name),
1207                VariantPathElement::Index { index } => output.get_list_element(*index),
1208            })
1209    }
1210}
1211
1212impl From<()> for Variant<'_, '_> {
1213    fn from((): ()) -> Self {
1214        Variant::Null
1215    }
1216}
1217
1218impl From<bool> for Variant<'_, '_> {
1219    fn from(value: bool) -> Self {
1220        match value {
1221            true => Variant::BooleanTrue,
1222            false => Variant::BooleanFalse,
1223        }
1224    }
1225}
1226
1227impl From<i8> for Variant<'_, '_> {
1228    fn from(value: i8) -> Self {
1229        Variant::Int8(value)
1230    }
1231}
1232
1233impl From<i16> for Variant<'_, '_> {
1234    fn from(value: i16) -> Self {
1235        Variant::Int16(value)
1236    }
1237}
1238
1239impl From<i32> for Variant<'_, '_> {
1240    fn from(value: i32) -> Self {
1241        Variant::Int32(value)
1242    }
1243}
1244
1245impl From<i64> for Variant<'_, '_> {
1246    fn from(value: i64) -> Self {
1247        Variant::Int64(value)
1248    }
1249}
1250
1251impl From<u8> for Variant<'_, '_> {
1252    fn from(value: u8) -> Self {
1253        // if it fits in i8, use that, otherwise use i16
1254        if let Ok(value) = i8::try_from(value) {
1255            Variant::Int8(value)
1256        } else {
1257            Variant::Int16(i16::from(value))
1258        }
1259    }
1260}
1261
1262impl From<u16> for Variant<'_, '_> {
1263    fn from(value: u16) -> Self {
1264        // if it fits in i16, use that, otherwise use i32
1265        if let Ok(value) = i16::try_from(value) {
1266            Variant::Int16(value)
1267        } else {
1268            Variant::Int32(i32::from(value))
1269        }
1270    }
1271}
1272impl From<u32> for Variant<'_, '_> {
1273    fn from(value: u32) -> Self {
1274        // if it fits in i32, use that, otherwise use i64
1275        if let Ok(value) = i32::try_from(value) {
1276            Variant::Int32(value)
1277        } else {
1278            Variant::Int64(i64::from(value))
1279        }
1280    }
1281}
1282
1283impl From<u64> for Variant<'_, '_> {
1284    fn from(value: u64) -> Self {
1285        // if it fits in i64, use that, otherwise use Decimal16
1286        if let Ok(value) = i64::try_from(value) {
1287            Variant::Int64(value)
1288        } else {
1289            // u64 max is 18446744073709551615, which fits in i128
1290            Variant::Decimal16(VariantDecimal16::try_new(i128::from(value), 0).unwrap())
1291        }
1292    }
1293}
1294
1295impl From<VariantDecimal4> for Variant<'_, '_> {
1296    fn from(value: VariantDecimal4) -> Self {
1297        Variant::Decimal4(value)
1298    }
1299}
1300
1301impl From<VariantDecimal8> for Variant<'_, '_> {
1302    fn from(value: VariantDecimal8) -> Self {
1303        Variant::Decimal8(value)
1304    }
1305}
1306
1307impl From<VariantDecimal16> for Variant<'_, '_> {
1308    fn from(value: VariantDecimal16) -> Self {
1309        Variant::Decimal16(value)
1310    }
1311}
1312
1313impl From<half::f16> for Variant<'_, '_> {
1314    fn from(value: half::f16) -> Self {
1315        Variant::Float(value.into())
1316    }
1317}
1318
1319impl From<f32> for Variant<'_, '_> {
1320    fn from(value: f32) -> Self {
1321        Variant::Float(value)
1322    }
1323}
1324
1325impl From<f64> for Variant<'_, '_> {
1326    fn from(value: f64) -> Self {
1327        Variant::Double(value)
1328    }
1329}
1330
1331impl From<NaiveDate> for Variant<'_, '_> {
1332    fn from(value: NaiveDate) -> Self {
1333        Variant::Date(value)
1334    }
1335}
1336
1337impl From<DateTime<Utc>> for Variant<'_, '_> {
1338    fn from(value: DateTime<Utc>) -> Self {
1339        if value.nanosecond() % 1000 > 0 {
1340            Variant::TimestampNanos(value)
1341        } else {
1342            Variant::TimestampMicros(value)
1343        }
1344    }
1345}
1346
1347impl From<NaiveDateTime> for Variant<'_, '_> {
1348    fn from(value: NaiveDateTime) -> Self {
1349        if value.nanosecond() % 1000 > 0 {
1350            Variant::TimestampNtzNanos(value)
1351        } else {
1352            Variant::TimestampNtzMicros(value)
1353        }
1354    }
1355}
1356
1357impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
1358    fn from(value: &'v [u8]) -> Self {
1359        Variant::Binary(value)
1360    }
1361}
1362
1363impl From<NaiveTime> for Variant<'_, '_> {
1364    fn from(value: NaiveTime) -> Self {
1365        Variant::Time(value)
1366    }
1367}
1368
1369impl From<Uuid> for Variant<'_, '_> {
1370    fn from(value: Uuid) -> Self {
1371        Variant::Uuid(value)
1372    }
1373}
1374
1375impl<'v> From<&'v str> for Variant<'_, 'v> {
1376    fn from(value: &'v str) -> Self {
1377        if value.len() > MAX_SHORT_STRING_BYTES {
1378            Variant::String(value)
1379        } else {
1380            Variant::ShortString(ShortString(value))
1381        }
1382    }
1383}
1384
1385impl TryFrom<(i32, u8)> for Variant<'_, '_> {
1386    type Error = ArrowError;
1387
1388    fn try_from(value: (i32, u8)) -> Result<Self, Self::Error> {
1389        Ok(Variant::Decimal4(VariantDecimal4::try_new(
1390            value.0, value.1,
1391        )?))
1392    }
1393}
1394
1395impl TryFrom<(i64, u8)> for Variant<'_, '_> {
1396    type Error = ArrowError;
1397
1398    fn try_from(value: (i64, u8)) -> Result<Self, Self::Error> {
1399        Ok(Variant::Decimal8(VariantDecimal8::try_new(
1400            value.0, value.1,
1401        )?))
1402    }
1403}
1404
1405impl TryFrom<(i128, u8)> for Variant<'_, '_> {
1406    type Error = ArrowError;
1407
1408    fn try_from(value: (i128, u8)) -> Result<Self, Self::Error> {
1409        Ok(Variant::Decimal16(VariantDecimal16::try_new(
1410            value.0, value.1,
1411        )?))
1412    }
1413}
1414
1415// helper to print <invalid> instead of "<invalid>" in debug mode when a VariantObject or VariantList contains invalid values.
1416struct InvalidVariant;
1417
1418impl std::fmt::Debug for InvalidVariant {
1419    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1420        write!(f, "<invalid>")
1421    }
1422}
1423
1424// helper to print binary data in hex format in debug mode, as space-separated hex byte values.
1425struct HexString<'a>(&'a [u8]);
1426
1427impl<'a> std::fmt::Debug for HexString<'a> {
1428    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1429        if let Some((first, rest)) = self.0.split_first() {
1430            write!(f, "{:02x}", first)?;
1431            for b in rest {
1432                write!(f, " {:02x}", b)?;
1433            }
1434        }
1435        Ok(())
1436    }
1437}
1438
1439impl std::fmt::Debug for Variant<'_, '_> {
1440    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1441        match self {
1442            Variant::Null => write!(f, "Null"),
1443            Variant::BooleanTrue => write!(f, "BooleanTrue"),
1444            Variant::BooleanFalse => write!(f, "BooleanFalse"),
1445            Variant::Int8(v) => f.debug_tuple("Int8").field(v).finish(),
1446            Variant::Int16(v) => f.debug_tuple("Int16").field(v).finish(),
1447            Variant::Int32(v) => f.debug_tuple("Int32").field(v).finish(),
1448            Variant::Int64(v) => f.debug_tuple("Int64").field(v).finish(),
1449            Variant::Float(v) => f.debug_tuple("Float").field(v).finish(),
1450            Variant::Double(v) => f.debug_tuple("Double").field(v).finish(),
1451            Variant::Decimal4(d) => f.debug_tuple("Decimal4").field(d).finish(),
1452            Variant::Decimal8(d) => f.debug_tuple("Decimal8").field(d).finish(),
1453            Variant::Decimal16(d) => f.debug_tuple("Decimal16").field(d).finish(),
1454            Variant::Date(d) => f.debug_tuple("Date").field(d).finish(),
1455            Variant::TimestampMicros(ts) => f.debug_tuple("TimestampMicros").field(ts).finish(),
1456            Variant::TimestampNtzMicros(ts) => {
1457                f.debug_tuple("TimestampNtzMicros").field(ts).finish()
1458            }
1459            Variant::TimestampNanos(ts) => f.debug_tuple("TimestampNanos").field(ts).finish(),
1460            Variant::TimestampNtzNanos(ts) => f.debug_tuple("TimestampNtzNanos").field(ts).finish(),
1461            Variant::Binary(bytes) => write!(f, "Binary({:?})", HexString(bytes)),
1462            Variant::String(s) => f.debug_tuple("String").field(s).finish(),
1463            Variant::Time(s) => f.debug_tuple("Time").field(s).finish(),
1464            Variant::ShortString(s) => f.debug_tuple("ShortString").field(s).finish(),
1465            Variant::Uuid(uuid) => f.debug_tuple("Uuid").field(&uuid).finish(),
1466            Variant::Object(obj) => {
1467                let mut map = f.debug_map();
1468                for res in obj.iter_try() {
1469                    match res {
1470                        Ok((k, v)) => map.entry(&k, &v),
1471                        Err(_) => map.entry(&InvalidVariant, &InvalidVariant),
1472                    };
1473                }
1474                map.finish()
1475            }
1476            Variant::List(arr) => {
1477                let mut list = f.debug_list();
1478                for res in arr.iter_try() {
1479                    match res {
1480                        Ok(v) => list.entry(&v),
1481                        Err(_) => list.entry(&InvalidVariant),
1482                    };
1483                }
1484                list.finish()
1485            }
1486        }
1487    }
1488}
1489
1490#[cfg(test)]
1491mod tests {
1492
1493    use super::*;
1494
1495    #[test]
1496    fn test_empty_variant_will_fail() {
1497        let metadata = VariantMetadata::try_new(&[1, 0, 0]).unwrap();
1498
1499        let err = Variant::try_new_with_metadata(metadata, &[]).unwrap_err();
1500
1501        assert!(matches!(
1502            err,
1503            ArrowError::InvalidArgumentError(ref msg) if msg == "Received empty bytes"));
1504    }
1505
1506    #[test]
1507    fn test_construct_short_string() {
1508        let short_string = ShortString::try_new("norm").expect("should fit in short string");
1509        assert_eq!(short_string.as_str(), "norm");
1510
1511        let long_string = "a".repeat(MAX_SHORT_STRING_BYTES + 1);
1512        let res = ShortString::try_new(&long_string);
1513        assert!(res.is_err());
1514    }
1515
1516    #[test]
1517    fn test_variant_decimal_conversion() {
1518        let decimal4 = VariantDecimal4::try_new(1234_i32, 2).unwrap();
1519        let variant = Variant::from(decimal4);
1520        assert_eq!(variant.as_decimal4(), Some(decimal4));
1521
1522        let decimal8 = VariantDecimal8::try_new(12345678901_i64, 2).unwrap();
1523        let variant = Variant::from(decimal8);
1524        assert_eq!(variant.as_decimal8(), Some(decimal8));
1525
1526        let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890_i128, 2).unwrap();
1527        let variant = Variant::from(decimal16);
1528        assert_eq!(variant.as_decimal16(), Some(decimal16));
1529    }
1530
1531    #[test]
1532    fn test_variant_all_subtypes_debug() {
1533        use crate::VariantBuilder;
1534
1535        let mut builder = VariantBuilder::new();
1536
1537        // Create a root object that contains one of every variant subtype
1538        let mut root_obj = builder.new_object();
1539
1540        // Add primitive types
1541        root_obj.insert("null", ());
1542        root_obj.insert("boolean_true", true);
1543        root_obj.insert("boolean_false", false);
1544        root_obj.insert("int8", 42i8);
1545        root_obj.insert("int16", 1234i16);
1546        root_obj.insert("int32", 123456i32);
1547        root_obj.insert("int64", 1234567890123456789i64);
1548        root_obj.insert("float", 1.234f32);
1549        root_obj.insert("double", 1.23456789f64);
1550
1551        // Add date and timestamp types
1552        let date = chrono::NaiveDate::from_ymd_opt(2024, 12, 25).unwrap();
1553        root_obj.insert("date", date);
1554
1555        let timestamp_utc = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1556            .unwrap()
1557            .and_hms_milli_opt(15, 30, 45, 123)
1558            .unwrap()
1559            .and_utc();
1560        root_obj.insert("timestamp_micros", Variant::TimestampMicros(timestamp_utc));
1561
1562        let timestamp_ntz = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1563            .unwrap()
1564            .and_hms_milli_opt(15, 30, 45, 123)
1565            .unwrap();
1566        root_obj.insert(
1567            "timestamp_ntz_micros",
1568            Variant::TimestampNtzMicros(timestamp_ntz),
1569        );
1570
1571        let timestamp_nanos_utc = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
1572            .unwrap()
1573            .and_hms_nano_opt(12, 3, 4, 123456789)
1574            .unwrap()
1575            .and_utc();
1576        root_obj.insert(
1577            "timestamp_nanos",
1578            Variant::TimestampNanos(timestamp_nanos_utc),
1579        );
1580
1581        let timestamp_ntz_nanos = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
1582            .unwrap()
1583            .and_hms_nano_opt(12, 3, 4, 123456789)
1584            .unwrap();
1585        root_obj.insert(
1586            "timestamp_ntz_nanos",
1587            Variant::TimestampNtzNanos(timestamp_ntz_nanos),
1588        );
1589
1590        // Add decimal types
1591        let decimal4 = VariantDecimal4::try_new(1234i32, 2).unwrap();
1592        root_obj.insert("decimal4", decimal4);
1593
1594        let decimal8 = VariantDecimal8::try_new(123456789i64, 3).unwrap();
1595        root_obj.insert("decimal8", decimal8);
1596
1597        let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890i128, 4).unwrap();
1598        root_obj.insert("decimal16", decimal16);
1599
1600        // Add binary and string types
1601        let binary_data = b"\x01\x02\x03\x04\xde\xad\xbe\xef";
1602        root_obj.insert("binary", binary_data.as_slice());
1603
1604        let long_string =
1605            "This is a long string that exceeds the short string limit and contains emoji 🦀";
1606        root_obj.insert("string", long_string);
1607        root_obj.insert("short_string", "Short string with emoji 🎉");
1608        let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1609        root_obj.insert("time", time);
1610
1611        // Add uuid
1612        let uuid = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
1613        root_obj.insert("uuid", Variant::Uuid(uuid));
1614
1615        // Add nested object
1616        let mut nested_obj = root_obj.new_object("nested_object");
1617        nested_obj.insert("inner_key1", "inner_value1");
1618        nested_obj.insert("inner_key2", 999i32);
1619        nested_obj.finish();
1620
1621        // Add list with mixed types
1622        let mut mixed_list = root_obj.new_list("mixed_list");
1623        mixed_list.append_value(1i32);
1624        mixed_list.append_value("two");
1625        mixed_list.append_value(true);
1626        mixed_list.append_value(4.0f32);
1627        mixed_list.append_value(());
1628
1629        // Add nested list inside the mixed list
1630        let mut nested_list = mixed_list.new_list();
1631        nested_list.append_value("nested");
1632        nested_list.append_value(10i8);
1633        nested_list.finish();
1634
1635        mixed_list.finish();
1636
1637        root_obj.finish();
1638
1639        let (metadata, value) = builder.finish();
1640        let variant = Variant::try_new(&metadata, &value).unwrap();
1641
1642        // Test Debug formatter (?)
1643        let debug_output = format!("{:?}", variant);
1644
1645        // Verify that the debug output contains all the expected types
1646        assert!(debug_output.contains("\"null\": Null"));
1647        assert!(debug_output.contains("\"boolean_true\": BooleanTrue"));
1648        assert!(debug_output.contains("\"boolean_false\": BooleanFalse"));
1649        assert!(debug_output.contains("\"int8\": Int8(42)"));
1650        assert!(debug_output.contains("\"int16\": Int16(1234)"));
1651        assert!(debug_output.contains("\"int32\": Int32(123456)"));
1652        assert!(debug_output.contains("\"int64\": Int64(1234567890123456789)"));
1653        assert!(debug_output.contains("\"float\": Float(1.234)"));
1654        assert!(debug_output.contains("\"double\": Double(1.23456789"));
1655        assert!(debug_output.contains("\"date\": Date(2024-12-25)"));
1656        assert!(debug_output.contains("\"timestamp_micros\": TimestampMicros("));
1657        assert!(debug_output.contains("\"timestamp_ntz_micros\": TimestampNtzMicros("));
1658        assert!(debug_output.contains("\"timestamp_nanos\": TimestampNanos("));
1659        assert!(debug_output.contains("\"timestamp_ntz_nanos\": TimestampNtzNanos("));
1660        assert!(debug_output.contains("\"decimal4\": Decimal4("));
1661        assert!(debug_output.contains("\"decimal8\": Decimal8("));
1662        assert!(debug_output.contains("\"decimal16\": Decimal16("));
1663        assert!(debug_output.contains("\"binary\": Binary(01 02 03 04 de ad be ef)"));
1664        assert!(debug_output.contains("\"string\": String("));
1665        assert!(debug_output.contains("\"short_string\": ShortString("));
1666        assert!(debug_output.contains("\"uuid\": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)"));
1667        assert!(debug_output.contains("\"time\": Time(01:02:03.000004)"));
1668        assert!(debug_output.contains("\"nested_object\":"));
1669        assert!(debug_output.contains("\"mixed_list\":"));
1670
1671        let expected = r#"{"binary": Binary(01 02 03 04 de ad be ef), "boolean_false": BooleanFalse, "boolean_true": BooleanTrue, "date": Date(2024-12-25), "decimal16": Decimal16(VariantDecimal16 { integer: 123456789012345678901234567890, scale: 4 }), "decimal4": Decimal4(VariantDecimal4 { integer: 1234, scale: 2 }), "decimal8": Decimal8(VariantDecimal8 { integer: 123456789, scale: 3 }), "double": Double(1.23456789), "float": Float(1.234), "int16": Int16(1234), "int32": Int32(123456), "int64": Int64(1234567890123456789), "int8": Int8(42), "mixed_list": [Int32(1), ShortString(ShortString("two")), BooleanTrue, Float(4.0), Null, [ShortString(ShortString("nested")), Int8(10)]], "nested_object": {"inner_key1": ShortString(ShortString("inner_value1")), "inner_key2": Int32(999)}, "null": Null, "short_string": ShortString(ShortString("Short string with emoji 🎉")), "string": String("This is a long string that exceeds the short string limit and contains emoji 🦀"), "time": Time(01:02:03.000004), "timestamp_micros": TimestampMicros(2024-12-25T15:30:45.123Z), "timestamp_nanos": TimestampNanos(2025-08-15T12:03:04.123456789Z), "timestamp_ntz_micros": TimestampNtzMicros(2024-12-25T15:30:45.123), "timestamp_ntz_nanos": TimestampNtzNanos(2025-08-15T12:03:04.123456789), "uuid": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)}"#;
1672        assert_eq!(debug_output, expected);
1673
1674        // Test alternate Debug formatter (#?)
1675        let alt_debug_output = format!("{:#?}", variant);
1676        let expected = r#"{
1677    "binary": Binary(01 02 03 04 de ad be ef),
1678    "boolean_false": BooleanFalse,
1679    "boolean_true": BooleanTrue,
1680    "date": Date(
1681        2024-12-25,
1682    ),
1683    "decimal16": Decimal16(
1684        VariantDecimal16 {
1685            integer: 123456789012345678901234567890,
1686            scale: 4,
1687        },
1688    ),
1689    "decimal4": Decimal4(
1690        VariantDecimal4 {
1691            integer: 1234,
1692            scale: 2,
1693        },
1694    ),
1695    "decimal8": Decimal8(
1696        VariantDecimal8 {
1697            integer: 123456789,
1698            scale: 3,
1699        },
1700    ),
1701    "double": Double(
1702        1.23456789,
1703    ),
1704    "float": Float(
1705        1.234,
1706    ),
1707    "int16": Int16(
1708        1234,
1709    ),
1710    "int32": Int32(
1711        123456,
1712    ),
1713    "int64": Int64(
1714        1234567890123456789,
1715    ),
1716    "int8": Int8(
1717        42,
1718    ),
1719    "mixed_list": [
1720        Int32(
1721            1,
1722        ),
1723        ShortString(
1724            ShortString(
1725                "two",
1726            ),
1727        ),
1728        BooleanTrue,
1729        Float(
1730            4.0,
1731        ),
1732        Null,
1733        [
1734            ShortString(
1735                ShortString(
1736                    "nested",
1737                ),
1738            ),
1739            Int8(
1740                10,
1741            ),
1742        ],
1743    ],
1744    "nested_object": {
1745        "inner_key1": ShortString(
1746            ShortString(
1747                "inner_value1",
1748            ),
1749        ),
1750        "inner_key2": Int32(
1751            999,
1752        ),
1753    },
1754    "null": Null,
1755    "short_string": ShortString(
1756        ShortString(
1757            "Short string with emoji 🎉",
1758        ),
1759    ),
1760    "string": String(
1761        "This is a long string that exceeds the short string limit and contains emoji 🦀",
1762    ),
1763    "time": Time(
1764        01:02:03.000004,
1765    ),
1766    "timestamp_micros": TimestampMicros(
1767        2024-12-25T15:30:45.123Z,
1768    ),
1769    "timestamp_nanos": TimestampNanos(
1770        2025-08-15T12:03:04.123456789Z,
1771    ),
1772    "timestamp_ntz_micros": TimestampNtzMicros(
1773        2024-12-25T15:30:45.123,
1774    ),
1775    "timestamp_ntz_nanos": TimestampNtzNanos(
1776        2025-08-15T12:03:04.123456789,
1777    ),
1778    "uuid": Uuid(
1779        67e55044-10b1-426f-9247-bb680e5fe0c8,
1780    ),
1781}"#;
1782        assert_eq!(alt_debug_output, expected);
1783    }
1784}