parquet_variant/
variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use crate::decoder::{
18    self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
19};
20use crate::utils::{array_from_slice, first_byte_from_slice, slice_from_slice, string_from_slice};
21use arrow_schema::ArrowError;
22use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
23use std::{num::TryFromIntError, ops::Range};
24
25#[derive(Clone, Debug, Copy, PartialEq)]
26enum OffsetSizeBytes {
27    One = 1,
28    Two = 2,
29    Three = 3,
30    Four = 4,
31}
32
33impl OffsetSizeBytes {
34    /// Build from the `offset_size_minus_one` bits (see spec).
35    fn try_new(offset_size_minus_one: u8) -> Result<Self, ArrowError> {
36        use OffsetSizeBytes::*;
37        let result = match offset_size_minus_one {
38            0 => One,
39            1 => Two,
40            2 => Three,
41            3 => Four,
42            _ => {
43                return Err(ArrowError::InvalidArgumentError(
44                    "offset_size_minus_one must be 0–3".to_string(),
45                ))
46            }
47        };
48        Ok(result)
49    }
50
51    /// Return one unsigned little-endian value from `bytes`.
52    ///
53    /// * `bytes` – the Variant-metadata buffer.
54    /// * `byte_offset` – number of bytes to skip **before** reading the first
55    ///   value (usually `1` to move past the header byte).
56    /// * `offset_index` – 0-based index **after** the skip
57    ///   (`0` is the first value, `1` the next, …).
58    ///
59    /// Each value is `self as usize` bytes wide (1, 2, 3 or 4).
60    /// Three-byte values are zero-extended to 32 bits before the final
61    /// fallible cast to `usize`.
62    fn unpack_usize(
63        &self,
64        bytes: &[u8],
65        byte_offset: usize,  // how many bytes to skip
66        offset_index: usize, // which offset in an array of offsets
67    ) -> Result<usize, ArrowError> {
68        use OffsetSizeBytes::*;
69        let offset = byte_offset + (*self as usize) * offset_index;
70        let result = match self {
71            One => u8::from_le_bytes(array_from_slice(bytes, offset)?).into(),
72            Two => u16::from_le_bytes(array_from_slice(bytes, offset)?).into(),
73            Three => {
74                // Let's grab the three byte le-chunk first
75                let b3_chunks: [u8; 3] = array_from_slice(bytes, offset)?;
76                // Let's pad it and construct a padded u32 from it.
77                let mut buf = [0u8; 4];
78                buf[..3].copy_from_slice(&b3_chunks);
79                u32::from_le_bytes(buf)
80                    .try_into()
81                    .map_err(|e: TryFromIntError| ArrowError::InvalidArgumentError(e.to_string()))?
82            }
83            Four => u32::from_le_bytes(array_from_slice(bytes, offset)?)
84                .try_into()
85                .map_err(|e: TryFromIntError| ArrowError::InvalidArgumentError(e.to_string()))?,
86        };
87        Ok(result)
88    }
89}
90
91#[derive(Clone, Debug, Copy, PartialEq)]
92pub struct VariantMetadataHeader {
93    version: u8,
94    is_sorted: bool,
95    /// Note: This is `offset_size_minus_one` + 1
96    offset_size: OffsetSizeBytes,
97}
98
99// According to the spec this is currently always = 1, and so we store this const for validation
100// purposes and to make that visible.
101const CORRECT_VERSION_VALUE: u8 = 1;
102
103impl VariantMetadataHeader {
104    /// Tries to construct the variant metadata header, which has the form
105    ///              7     6  5   4  3             0
106    ///             +-------+---+---+---------------+
107    /// header      |       |   |   |    version    |
108    ///             +-------+---+---+---------------+
109    ///                 ^         ^
110    ///                 |         +-- sorted_strings
111    ///                 +-- offset_size_minus_one
112    /// The version is a 4-bit value that must always contain the value 1.
113    /// - sorted_strings is a 1-bit value indicating whether dictionary strings are sorted and unique.
114    /// - offset_size_minus_one is a 2-bit value providing the number of bytes per dictionary size and offset field.
115    /// - The actual number of bytes, offset_size, is offset_size_minus_one + 1
116    pub fn try_new(bytes: &[u8]) -> Result<Self, ArrowError> {
117        let header = first_byte_from_slice(bytes)?;
118
119        let version = header & 0x0F; // First four bits
120        if version != CORRECT_VERSION_VALUE {
121            let err_msg = format!(
122                "The version bytes in the header is not {CORRECT_VERSION_VALUE}, got {:b}",
123                version
124            );
125            return Err(ArrowError::InvalidArgumentError(err_msg));
126        }
127        let is_sorted = (header & 0x10) != 0; // Fifth bit
128        let offset_size_minus_one = header >> 6; // Last two bits
129        Ok(Self {
130            version,
131            is_sorted,
132            offset_size: OffsetSizeBytes::try_new(offset_size_minus_one)?,
133        })
134    }
135}
136
137#[derive(Clone, Copy, Debug, PartialEq)]
138/// Encodes the Variant Metadata, see the Variant spec file for more information
139pub struct VariantMetadata<'m> {
140    bytes: &'m [u8],
141    header: VariantMetadataHeader,
142    dict_size: usize,
143    dictionary_key_start_byte: usize,
144}
145
146impl<'m> VariantMetadata<'m> {
147    /// View the raw bytes (needed by very low-level decoders)
148    #[inline]
149    pub const fn as_bytes(&self) -> &'m [u8] {
150        self.bytes
151    }
152
153    pub fn try_new(bytes: &'m [u8]) -> Result<Self, ArrowError> {
154        let header = VariantMetadataHeader::try_new(bytes)?;
155        // Offset 1, index 0 because first element after header is dictionary size
156        let dict_size = header.offset_size.unpack_usize(bytes, 1, 0)?;
157
158        // Check that we have the correct metadata length according to dictionary_size, or return
159        // error early.
160        // Minimum number of bytes the metadata buffer must contain:
161        // 1 byte header
162        // + offset_size-byte `dictionary_size` field
163        // + (dict_size + 1) offset entries, each `offset_size` bytes. (Table size, essentially)
164        // 1 + offset_size + (dict_size + 1) * offset_size
165        // = (dict_size + 2) * offset_size + 1
166        let offset_size = header.offset_size as usize; // Cheap to copy
167
168        let dictionary_key_start_byte = dict_size
169            .checked_add(2)
170            .and_then(|n| n.checked_mul(offset_size))
171            .and_then(|n| n.checked_add(1))
172            .ok_or_else(|| ArrowError::InvalidArgumentError("metadata length overflow".into()))?;
173
174        if bytes.len() < dictionary_key_start_byte {
175            return Err(ArrowError::InvalidArgumentError(
176                "Metadata shorter than dictionary_size implies".to_string(),
177            ));
178        }
179
180        // Check that all offsets are monotonically increasing
181        let mut offsets = (0..=dict_size).map(|i| header.offset_size.unpack_usize(bytes, 1, i + 1));
182        let Some(Ok(mut end @ 0)) = offsets.next() else {
183            return Err(ArrowError::InvalidArgumentError(
184                "First offset is non-zero".to_string(),
185            ));
186        };
187
188        for offset in offsets {
189            let offset = offset?;
190            if end >= offset {
191                return Err(ArrowError::InvalidArgumentError(
192                    "Offsets are not monotonically increasing".to_string(),
193                ));
194            }
195            end = offset;
196        }
197
198        // Verify the buffer covers the whole dictionary-string section
199        if end > bytes.len() - dictionary_key_start_byte {
200            // `prev` holds the last offset seen still
201            return Err(ArrowError::InvalidArgumentError(
202                "Last offset does not equal dictionary length".to_string(),
203            ));
204        }
205
206        Ok(Self {
207            bytes,
208            header,
209            dict_size,
210            dictionary_key_start_byte,
211        })
212    }
213
214    /// Whether the dictionary keys are sorted and unique
215    pub fn is_sorted(&self) -> bool {
216        self.header.is_sorted
217    }
218
219    /// Get the dictionary size
220    pub fn dictionary_size(&self) -> usize {
221        self.dict_size
222    }
223    pub fn version(&self) -> u8 {
224        self.header.version
225    }
226
227    /// Helper method to get the offset start and end range for a key by index.
228    fn get_offsets_for_key_by(&self, index: usize) -> Result<Range<usize>, ArrowError> {
229        if index >= self.dict_size {
230            return Err(ArrowError::InvalidArgumentError(format!(
231                "Index {} out of bounds for dictionary of length {}",
232                index, self.dict_size
233            )));
234        }
235
236        // Skipping the header byte (setting byte_offset = 1) and the dictionary_size (setting offset_index +1)
237        let unpack = |i| self.header.offset_size.unpack_usize(self.bytes, 1, i + 1);
238        Ok(unpack(index)?..unpack(index + 1)?)
239    }
240
241    /// Get a single offset by index
242    pub fn get_offset_by(&self, index: usize) -> Result<usize, ArrowError> {
243        if index >= self.dict_size {
244            return Err(ArrowError::InvalidArgumentError(format!(
245                "Index {} out of bounds for dictionary of length {}",
246                index, self.dict_size
247            )));
248        }
249
250        // Skipping the header byte (setting byte_offset = 1) and the dictionary_size (setting offset_index +1)
251        let unpack = |i| self.header.offset_size.unpack_usize(self.bytes, 1, i + 1);
252        unpack(index)
253    }
254
255    /// Get the key-name by index
256    pub fn get_field_by(&self, index: usize) -> Result<&'m str, ArrowError> {
257        let offset_range = self.get_offsets_for_key_by(index)?;
258        self.get_field_by_offset(offset_range)
259    }
260
261    /// Gets the field using an offset (Range) - helper method to keep consistent API.
262    pub(crate) fn get_field_by_offset(&self, offset: Range<usize>) -> Result<&'m str, ArrowError> {
263        let dictionary_keys_bytes =
264            slice_from_slice(self.bytes, self.dictionary_key_start_byte..self.bytes.len())?;
265        let result = string_from_slice(dictionary_keys_bytes, offset)?;
266
267        Ok(result)
268    }
269
270    pub fn header(&self) -> VariantMetadataHeader {
271        self.header
272    }
273
274    /// Get the offsets as an iterator
275    pub fn offsets(&self) -> impl Iterator<Item = Result<Range<usize>, ArrowError>> + 'm {
276        let offset_size = self.header.offset_size; // `Copy`
277        let bytes = self.bytes;
278
279        (0..self.dict_size).map(move |i| {
280            // This wont be out of bounds as long as dict_size and offsets have been validated
281            // during construction via `try_new`, as it calls unpack_usize for the
282            // indices `1..dict_size+1` already.
283            let start = offset_size.unpack_usize(bytes, 1, i + 1);
284            let end = offset_size.unpack_usize(bytes, 1, i + 2);
285
286            match (start, end) {
287                (Ok(s), Ok(e)) => Ok(s..e),
288                (Err(e), _) | (_, Err(e)) => Err(e),
289            }
290        })
291    }
292
293    /// Get all key-names as an Iterator of strings
294    pub fn fields(
295        &'m self,
296    ) -> Result<impl Iterator<Item = Result<&'m str, ArrowError>>, ArrowError> {
297        let iterator = self
298            .offsets()
299            .map(move |offset_range| self.get_field_by_offset(offset_range?));
300        Ok(iterator)
301    }
302}
303
304#[derive(Clone, Copy, Debug, PartialEq)]
305pub struct VariantObject<'m, 'v> {
306    pub metadata: &'m VariantMetadata<'m>,
307    pub value_metadata: u8,
308    pub value_data: &'v [u8],
309}
310impl<'m, 'v> VariantObject<'m, 'v> {
311    pub fn fields(&self) -> Result<impl Iterator<Item = (&'m str, Variant<'m, 'v>)>, ArrowError> {
312        todo!();
313        #[allow(unreachable_code)] // Just to infer the return type
314        Ok(vec![].into_iter())
315    }
316    pub fn field(&self, _name: &'m str) -> Result<Variant<'m, 'v>, ArrowError> {
317        todo!()
318    }
319}
320
321#[derive(Clone, Copy, Debug, PartialEq)]
322pub struct VariantArray<'m, 'v> {
323    pub metadata: &'m VariantMetadata<'m>,
324    pub value_metadata: u8,
325    pub value_data: &'v [u8],
326}
327
328impl<'m, 'v> VariantArray<'m, 'v> {
329    /// Return the length of this array
330    pub fn len(&self) -> usize {
331        todo!()
332    }
333
334    /// Is the array of zero length
335    pub fn is_empty(&self) -> bool {
336        self.len() == 0
337    }
338
339    pub fn values(&self) -> Result<impl Iterator<Item = Variant<'m, 'v>>, ArrowError> {
340        todo!();
341        #[allow(unreachable_code)] // Just to infer the return type
342        Ok(vec![].into_iter())
343    }
344
345    pub fn get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> {
346        // The 6 first bits to the left are the value_header and the 2 bits
347        // to the right are the basic type, so we shift to get only the value_header
348        let value_header = self.value_metadata >> 2;
349        let is_large = (value_header & 0x04) != 0; // 3rd bit from the right
350        let field_offset_size_minus_one = value_header & 0x03; // Last two bits
351        let offset_size = OffsetSizeBytes::try_new(field_offset_size_minus_one)?;
352        // The size of the num_elements entry in the array value_data is 4 bytes if
353        // is_large is true, otherwise 1 byte.
354        let num_elements_size = match is_large {
355            true => OffsetSizeBytes::Four,
356            false => OffsetSizeBytes::One,
357        };
358        // Read the num_elements
359        // The size of the num_elements entry in the array value_data is 4 bytes if
360        // is_large is true, otherwise 1 byte.
361        let num_elements = num_elements_size.unpack_usize(self.value_data, 0, 0)?;
362        let first_offset_byte = num_elements_size as usize;
363
364        let overflow =
365            || ArrowError::InvalidArgumentError("Variant value_byte_length overflow".into());
366
367        // 1.  num_elements + 1
368        let n_offsets = num_elements.checked_add(1).ok_or_else(overflow)?;
369
370        // 2.  (num_elements + 1) * offset_size
371        let value_bytes = n_offsets
372            .checked_mul(offset_size as usize)
373            .ok_or_else(overflow)?;
374
375        // 3.  first_offset_byte + ...
376        let first_value_byte = first_offset_byte
377            .checked_add(value_bytes)
378            .ok_or_else(overflow)?;
379
380        // Skip num_elements bytes to read the offsets
381        let start_field_offset_from_first_value_byte =
382            offset_size.unpack_usize(self.value_data, first_offset_byte, index)?;
383        let end_field_offset_from_first_value_byte =
384            offset_size.unpack_usize(self.value_data, first_offset_byte, index + 1)?;
385
386        // Read the value bytes from the offsets
387        let variant_value_bytes = slice_from_slice(
388            self.value_data,
389            first_value_byte + start_field_offset_from_first_value_byte
390                ..first_value_byte + end_field_offset_from_first_value_byte,
391        )?;
392        let variant = Variant::try_new(self.metadata, variant_value_bytes)?;
393        Ok(variant)
394    }
395}
396
397// impl<'m, 'v> Index<usize> for VariantArray<'m, 'v> {
398//     type Output = Variant<'m, 'v>;
399//
400// }
401
402/// Variant value. May contain references to metadata and value
403#[derive(Clone, Debug, Copy, PartialEq)]
404pub enum Variant<'m, 'v> {
405    // TODO: Add types for the rest of the primitive types, once API is agreed upon
406    Null,
407    Int8(i8),
408    Int16(i16),
409    Int32(i32),
410    Int64(i64),
411    Date(NaiveDate),
412    TimestampMicros(DateTime<Utc>),
413    TimestampNtzMicros(NaiveDateTime),
414    Decimal4 { integer: i32, scale: u8 },
415    Decimal8 { integer: i64, scale: u8 },
416    Decimal16 { integer: i128, scale: u8 },
417    Float(f32),
418    Double(f64),
419    BooleanTrue,
420    BooleanFalse,
421
422    // Note: only need the *value* buffer
423    Binary(&'v [u8]),
424    String(&'v str),
425    ShortString(&'v str),
426
427    // need both metadata & value
428    Object(VariantObject<'m, 'v>),
429    Array(VariantArray<'m, 'v>),
430}
431
432impl<'m, 'v> Variant<'m, 'v> {
433    /// Parse the buffers and return the appropriate variant.
434    pub fn try_new(metadata: &'m VariantMetadata, value: &'v [u8]) -> Result<Self, ArrowError> {
435        let value_metadata = *first_byte_from_slice(value)?;
436        let value_data = slice_from_slice(value, 1..)?;
437        let new_self = match get_basic_type(value_metadata)? {
438            VariantBasicType::Primitive => match get_primitive_type(value_metadata)? {
439                VariantPrimitiveType::Null => Variant::Null,
440                VariantPrimitiveType::Int8 => Variant::Int8(decoder::decode_int8(value_data)?),
441                VariantPrimitiveType::Int16 => Variant::Int16(decoder::decode_int16(value_data)?),
442                VariantPrimitiveType::Int32 => Variant::Int32(decoder::decode_int32(value_data)?),
443                VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
444                VariantPrimitiveType::Decimal4 => {
445                    let (integer, scale) = decoder::decode_decimal4(value_data)?;
446                    Variant::Decimal4 { integer, scale }
447                }
448                VariantPrimitiveType::Decimal8 => {
449                    let (integer, scale) = decoder::decode_decimal8(value_data)?;
450                    Variant::Decimal8 { integer, scale }
451                }
452                VariantPrimitiveType::Decimal16 => {
453                    let (integer, scale) = decoder::decode_decimal16(value_data)?;
454                    Variant::Decimal16 { integer, scale }
455                }
456                VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
457                VariantPrimitiveType::Double => {
458                    Variant::Double(decoder::decode_double(value_data)?)
459                }
460                VariantPrimitiveType::BooleanTrue => Variant::BooleanTrue,
461                VariantPrimitiveType::BooleanFalse => Variant::BooleanFalse,
462                // TODO: Add types for the rest, once API is agreed upon
463                VariantPrimitiveType::Date => Variant::Date(decoder::decode_date(value_data)?),
464                VariantPrimitiveType::TimestampMicros => {
465                    Variant::TimestampMicros(decoder::decode_timestamp_micros(value_data)?)
466                }
467                VariantPrimitiveType::TimestampNtzMicros => {
468                    Variant::TimestampNtzMicros(decoder::decode_timestampntz_micros(value_data)?)
469                }
470                VariantPrimitiveType::Binary => {
471                    Variant::Binary(decoder::decode_binary(value_data)?)
472                }
473                VariantPrimitiveType::String => {
474                    Variant::String(decoder::decode_long_string(value_data)?)
475                }
476            },
477            VariantBasicType::ShortString => {
478                Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
479            }
480            VariantBasicType::Object => Variant::Object(VariantObject {
481                metadata,
482                value_metadata,
483                value_data,
484            }),
485            VariantBasicType::Array => Variant::Array(VariantArray {
486                metadata,
487                value_metadata,
488                value_data,
489            }),
490        };
491        Ok(new_self)
492    }
493
494    /// Converts this variant to `()` if it is null.
495    ///
496    /// Returns `Some(())` for null variants,
497    /// `None` for non-null variants.
498    ///
499    /// # Examples
500    ///
501    /// ```
502    /// use parquet_variant::Variant;
503    ///
504    /// // you can extract `()` from a null variant
505    /// let v1 = Variant::from(());
506    /// assert_eq!(v1.as_null(), Some(()));
507    ///
508    /// // but not from other variants
509    /// let v2 = Variant::from("hello!");
510    /// assert_eq!(v2.as_null(), None);
511    /// ```
512    pub fn as_null(&self) -> Option<()> {
513        matches!(self, Variant::Null).then_some(())
514    }
515
516    /// Converts this variant to a `bool` if possible.
517    ///
518    /// Returns `Some(bool)` for boolean variants,
519    /// `None` for non-boolean variants.
520    ///
521    /// # Examples
522    ///
523    /// ```
524    /// use parquet_variant::Variant;
525    ///
526    /// // you can extract a bool from the true variant
527    /// let v1 = Variant::from(true);
528    /// assert_eq!(v1.as_boolean(), Some(true));
529    ///
530    /// // and the false variant
531    /// let v2 = Variant::from(false);
532    /// assert_eq!(v2.as_boolean(), Some(false));
533    ///
534    /// // but not from other variants
535    /// let v3 = Variant::from("hello!");
536    /// assert_eq!(v3.as_boolean(), None);
537    /// ```
538    pub fn as_boolean(&self) -> Option<bool> {
539        match self {
540            Variant::BooleanTrue => Some(true),
541            Variant::BooleanFalse => Some(false),
542            _ => None,
543        }
544    }
545
546    /// Converts this variant to a `NaiveDate` if possible.
547    ///
548    /// Returns `Some(NaiveDate)` for date variants,
549    /// `None` for non-date variants.
550    ///
551    /// # Examples
552    ///
553    /// ```
554    /// use parquet_variant::Variant;
555    /// use chrono::NaiveDate;
556    ///
557    /// // you can extract a NaiveDate from a date variant
558    /// let date = NaiveDate::from_ymd_opt(2025, 4, 12).unwrap();
559    /// let v1 = Variant::from(date);
560    /// assert_eq!(v1.as_naive_date(), Some(date));
561    ///
562    /// // but not from other variants
563    /// let v2 = Variant::from("hello!");
564    /// assert_eq!(v2.as_naive_date(), None);
565    /// ```
566    pub fn as_naive_date(&self) -> Option<NaiveDate> {
567        if let Variant::Date(d) = self {
568            Some(*d)
569        } else {
570            None
571        }
572    }
573
574    /// Converts this variant to a `DateTime<Utc>` if possible.
575    ///
576    /// Returns `Some(DateTime<Utc>)` for timestamp variants,
577    /// `None` for non-timestamp variants.
578    ///
579    /// # Examples
580    ///
581    /// ```
582    /// use parquet_variant::Variant;
583    /// use chrono::NaiveDate;
584    ///
585    /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
586    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
587    /// let v1 = Variant::from(datetime);
588    /// assert_eq!(v1.as_datetime_utc(), Some(datetime));
589    ///
590    /// // or a non-UTC-adjusted variant
591    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
592    /// let v2 = Variant::from(datetime);
593    /// assert_eq!(v2.as_datetime_utc(), Some(datetime.and_utc()));
594    ///
595    /// // but not from other variants
596    /// let v3 = Variant::from("hello!");
597    /// assert_eq!(v3.as_datetime_utc(), None);
598    /// ```
599    pub fn as_datetime_utc(&self) -> Option<DateTime<Utc>> {
600        match *self {
601            Variant::TimestampMicros(d) => Some(d),
602            Variant::TimestampNtzMicros(d) => Some(d.and_utc()),
603            _ => None,
604        }
605    }
606
607    /// Converts this variant to a `NaiveDateTime` if possible.
608    ///
609    /// Returns `Some(NaiveDateTime)` for timestamp variants,
610    /// `None` for non-timestamp variants.
611    ///
612    /// # Examples
613    ///
614    /// ```
615    /// use parquet_variant::Variant;
616    /// use chrono::NaiveDate;
617    ///
618    /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
619    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
620    /// let v1 = Variant::from(datetime);
621    /// assert_eq!(v1.as_naive_datetime(), Some(datetime));
622    ///
623    /// // or a UTC-adjusted variant
624    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
625    /// let v2 = Variant::from(datetime);
626    /// assert_eq!(v2.as_naive_datetime(), Some(datetime.naive_utc()));
627    ///
628    /// // but not from other variants
629    /// let v3 = Variant::from("hello!");
630    /// assert_eq!(v3.as_naive_datetime(), None);
631    /// ```
632    pub fn as_naive_datetime(&self) -> Option<NaiveDateTime> {
633        match *self {
634            Variant::TimestampNtzMicros(d) => Some(d),
635            Variant::TimestampMicros(d) => Some(d.naive_utc()),
636            _ => None,
637        }
638    }
639
640    /// Converts this variant to a `&[u8]` if possible.
641    ///
642    /// Returns `Some(&[u8])` for binary variants,
643    /// `None` for non-binary variants.
644    ///
645    /// # Examples
646    ///
647    /// ```
648    /// use parquet_variant::Variant;
649    ///
650    /// // you can extract a byte slice from a binary variant
651    /// let data = b"hello!";
652    /// let v1 = Variant::Binary(data);
653    /// assert_eq!(v1.as_u8_slice(), Some(data.as_slice()));
654    ///
655    /// // but not from other variant types
656    /// let v2 = Variant::from(123i64);
657    /// assert_eq!(v2.as_u8_slice(), None);
658    /// ```
659    pub fn as_u8_slice(&'v self) -> Option<&'v [u8]> {
660        if let Variant::Binary(d) = self {
661            Some(d)
662        } else {
663            None
664        }
665    }
666
667    /// Converts this variant to a `&str` if possible.
668    ///
669    /// Returns `Some(&str)` for string variants (both regular and short strings),
670    /// `None` for non-string variants.
671    ///
672    /// # Examples
673    ///
674    /// ```
675    /// use parquet_variant::Variant;
676    ///
677    /// // you can extract a string from string variants
678    /// let s = "hello!";
679    /// let v1 = Variant::ShortString(s);
680    /// assert_eq!(v1.as_string(), Some(s));
681    ///
682    /// // but not from other variants
683    /// let v2 = Variant::from(123i64);
684    /// assert_eq!(v2.as_string(), None);
685    /// ```
686    pub fn as_string(&'v self) -> Option<&'v str> {
687        match self {
688            Variant::String(s) | Variant::ShortString(s) => Some(s),
689            _ => None,
690        }
691    }
692
693    /// Converts this variant to an `i8` if possible.
694    ///
695    /// Returns `Some(i8)` for integer variants that fit in `i8` range,
696    /// `None` for non-integer variants or values that would overflow.
697    ///
698    /// # Examples
699    ///
700    /// ```
701    /// use parquet_variant::Variant;
702    ///
703    /// // you can read an int64 variant into an i8 if it fits
704    /// let v1 = Variant::from(123i64);
705    /// assert_eq!(v1.as_int8(), Some(123i8));
706    ///
707    /// // but not if it would overflow
708    /// let v2 = Variant::from(1234i64);
709    /// assert_eq!(v2.as_int8(), None);
710    ///
711    /// // or if the variant cannot be cast into an integer
712    /// let v3 = Variant::from("hello!");
713    /// assert_eq!(v3.as_int8(), None);
714    /// ```
715    pub fn as_int8(&self) -> Option<i8> {
716        match *self {
717            Variant::Int8(i) => Some(i),
718            Variant::Int16(i) => i.try_into().ok(),
719            Variant::Int32(i) => i.try_into().ok(),
720            Variant::Int64(i) => i.try_into().ok(),
721            _ => None,
722        }
723    }
724
725    /// Converts this variant to an `i16` if possible.
726    ///
727    /// Returns `Some(i16)` for integer variants that fit in `i16` range,
728    /// `None` for non-integer variants or values that would overflow.
729    ///
730    /// # Examples
731    ///
732    /// ```
733    /// use parquet_variant::Variant;
734    ///
735    /// // you can read an int64 variant into an i16 if it fits
736    /// let v1 = Variant::from(123i64);
737    /// assert_eq!(v1.as_int16(), Some(123i16));
738    ///
739    /// // but not if it would overflow
740    /// let v2 = Variant::from(123456i64);
741    /// assert_eq!(v2.as_int16(), None);
742    ///
743    /// // or if the variant cannot be cast into an integer
744    /// let v3 = Variant::from("hello!");
745    /// assert_eq!(v3.as_int16(), None);
746    /// ```
747    pub fn as_int16(&self) -> Option<i16> {
748        match *self {
749            Variant::Int8(i) => Some(i.into()),
750            Variant::Int16(i) => Some(i),
751            Variant::Int32(i) => i.try_into().ok(),
752            Variant::Int64(i) => i.try_into().ok(),
753            _ => None,
754        }
755    }
756
757    /// Converts this variant to an `i32` if possible.
758    ///
759    /// Returns `Some(i32)` for integer variants that fit in `i32` range,
760    /// `None` for non-integer variants or values that would overflow.
761    ///
762    /// # Examples
763    ///
764    /// ```
765    /// use parquet_variant::Variant;
766    ///
767    /// // you can read an int64 variant into an i32 if it fits
768    /// let v1 = Variant::from(123i64);
769    /// assert_eq!(v1.as_int32(), Some(123i32));
770    ///
771    /// // but not if it would overflow
772    /// let v2 = Variant::from(12345678901i64);
773    /// assert_eq!(v2.as_int32(), None);
774    ///
775    /// // or if the variant cannot be cast into an integer
776    /// let v3 = Variant::from("hello!");
777    /// assert_eq!(v3.as_int32(), None);
778    /// ```
779    pub fn as_int32(&self) -> Option<i32> {
780        match *self {
781            Variant::Int8(i) => Some(i.into()),
782            Variant::Int16(i) => Some(i.into()),
783            Variant::Int32(i) => Some(i),
784            Variant::Int64(i) => i.try_into().ok(),
785            _ => None,
786        }
787    }
788
789    /// Converts this variant to an `i64` if possible.
790    ///
791    /// Returns `Some(i64)` for integer variants that fit in `i64` range,
792    /// `None` for non-integer variants or values that would overflow.
793    ///
794    /// # Examples
795    ///
796    /// ```
797    /// use parquet_variant::Variant;
798    ///
799    /// // you can read an int64 variant into an i64
800    /// let v1 = Variant::from(123i64);
801    /// assert_eq!(v1.as_int64(), Some(123i64));
802    ///
803    /// // but not a variant that cannot be cast into an integer
804    /// let v2 = Variant::from("hello!");
805    /// assert_eq!(v2.as_int64(), None);
806    /// ```
807    pub fn as_int64(&self) -> Option<i64> {
808        match *self {
809            Variant::Int8(i) => Some(i.into()),
810            Variant::Int16(i) => Some(i.into()),
811            Variant::Int32(i) => Some(i.into()),
812            Variant::Int64(i) => Some(i),
813            _ => None,
814        }
815    }
816
817    /// Converts this variant to tuple with a 4-byte unscaled value if possible.
818    ///
819    /// Returns `Some((i32, u8))` for decimal variants where the unscaled value
820    /// fits in `i32` range,
821    /// `None` for non-decimal variants or decimal values that would overflow.
822    ///
823    /// # Examples
824    ///
825    /// ```
826    /// use parquet_variant::Variant;
827    ///
828    /// // you can extract decimal parts from smaller or equally-sized decimal variants
829    /// let v1 = Variant::from((1234_i32, 2));
830    /// assert_eq!(v1.as_decimal_int32(), Some((1234_i32, 2)));
831    ///
832    /// // and from larger decimal variants if they fit
833    /// let v2 = Variant::from((1234_i64, 2));
834    /// assert_eq!(v2.as_decimal_int32(), Some((1234_i32, 2)));
835    ///
836    /// // but not if the value would overflow i32
837    /// let v3 = Variant::from((12345678901i64, 2));
838    /// assert_eq!(v3.as_decimal_int32(), None);
839    ///
840    /// // or if the variant is not a decimal
841    /// let v4 = Variant::from("hello!");
842    /// assert_eq!(v4.as_decimal_int32(), None);
843    /// ```
844    pub fn as_decimal_int32(&self) -> Option<(i32, u8)> {
845        match *self {
846            Variant::Decimal4 { integer, scale } => Some((integer, scale)),
847            Variant::Decimal8 { integer, scale } => {
848                if let Ok(converted_integer) = integer.try_into() {
849                    Some((converted_integer, scale))
850                } else {
851                    None
852                }
853            }
854            Variant::Decimal16 { integer, scale } => {
855                if let Ok(converted_integer) = integer.try_into() {
856                    Some((converted_integer, scale))
857                } else {
858                    None
859                }
860            }
861            _ => None,
862        }
863    }
864
865    /// Converts this variant to tuple with an 8-byte unscaled value if possible.
866    ///
867    /// Returns `Some((i64, u8))` for decimal variants where the unscaled value
868    /// fits in `i64` range,
869    /// `None` for non-decimal variants or decimal values that would overflow.
870    ///
871    /// # Examples
872    ///
873    /// ```
874    /// use parquet_variant::Variant;
875    ///
876    /// // you can extract decimal parts from smaller or equally-sized decimal variants
877    /// let v1 = Variant::from((1234_i64, 2));
878    /// assert_eq!(v1.as_decimal_int64(), Some((1234_i64, 2)));
879    ///
880    /// // and from larger decimal variants if they fit
881    /// let v2 = Variant::from((1234_i128, 2));
882    /// assert_eq!(v2.as_decimal_int64(), Some((1234_i64, 2)));
883    ///
884    /// // but not if the value would overflow i64
885    /// let v3 = Variant::from((2e19 as i128, 2));
886    /// assert_eq!(v3.as_decimal_int64(), None);
887    ///
888    /// // or if the variant is not a decimal
889    /// let v4 = Variant::from("hello!");
890    /// assert_eq!(v4.as_decimal_int64(), None);
891    /// ```
892    pub fn as_decimal_int64(&self) -> Option<(i64, u8)> {
893        match *self {
894            Variant::Decimal4 { integer, scale } => Some((integer.into(), scale)),
895            Variant::Decimal8 { integer, scale } => Some((integer, scale)),
896            Variant::Decimal16 { integer, scale } => {
897                if let Ok(converted_integer) = integer.try_into() {
898                    Some((converted_integer, scale))
899                } else {
900                    None
901                }
902            }
903            _ => None,
904        }
905    }
906
907    /// Converts this variant to tuple with a 16-byte unscaled value if possible.
908    ///
909    /// Returns `Some((i128, u8))` for decimal variants where the unscaled value
910    /// fits in `i128` range,
911    /// `None` for non-decimal variants or decimal values that would overflow.
912    ///
913    /// # Examples
914    ///
915    /// ```
916    /// use parquet_variant::Variant;
917    ///
918    /// // you can extract decimal parts from smaller or equally-sized decimal variants
919    /// let v1 = Variant::from((1234_i128, 2));
920    /// assert_eq!(v1.as_decimal_int128(), Some((1234_i128, 2)));
921    ///
922    /// // but not if the variant is not a decimal
923    /// let v2 = Variant::from("hello!");
924    /// assert_eq!(v2.as_decimal_int128(), None);
925    /// ```
926    pub fn as_decimal_int128(&self) -> Option<(i128, u8)> {
927        match *self {
928            Variant::Decimal4 { integer, scale } => Some((integer.into(), scale)),
929            Variant::Decimal8 { integer, scale } => Some((integer.into(), scale)),
930            Variant::Decimal16 { integer, scale } => Some((integer, scale)),
931            _ => None,
932        }
933    }
934    /// Converts this variant to an `f32` if possible.
935    ///
936    /// Returns `Some(f32)` for float and double variants,
937    /// `None` for non-floating-point variants.
938    ///
939    /// # Examples
940    ///
941    /// ```
942    /// use parquet_variant::Variant;
943    ///
944    /// // you can extract an f32 from a float variant
945    /// let v1 = Variant::from(std::f32::consts::PI);
946    /// assert_eq!(v1.as_f32(), Some(std::f32::consts::PI));
947    ///
948    /// // and from a double variant (with loss of precision to nearest f32)
949    /// let v2 = Variant::from(std::f64::consts::PI);
950    /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
951    ///
952    /// // but not from other variants
953    /// let v3 = Variant::from("hello!");
954    /// assert_eq!(v3.as_f32(), None);
955    /// ```
956    #[allow(clippy::cast_possible_truncation)]
957    pub fn as_f32(&self) -> Option<f32> {
958        match *self {
959            Variant::Float(i) => Some(i),
960            Variant::Double(i) => Some(i as f32),
961            _ => None,
962        }
963    }
964
965    /// Converts this variant to an `f64` if possible.
966    ///
967    /// Returns `Some(f64)` for float and double variants,
968    /// `None` for non-floating-point variants.
969    ///
970    /// # Examples
971    ///
972    /// ```
973    /// use parquet_variant::Variant;
974    ///
975    /// // you can extract an f64 from a float variant
976    /// let v1 = Variant::from(std::f32::consts::PI);
977    /// assert_eq!(v1.as_f64(), Some(std::f32::consts::PI as f64));
978    ///
979    /// // and from a double variant
980    /// let v2 = Variant::from(std::f64::consts::PI);
981    /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
982    ///
983    /// // but not from other variants
984    /// let v3 = Variant::from("hello!");
985    /// assert_eq!(v3.as_f64(), None);
986    /// ```
987    pub fn as_f64(&self) -> Option<f64> {
988        match *self {
989            Variant::Float(i) => Some(i.into()),
990            Variant::Double(i) => Some(i),
991            _ => None,
992        }
993    }
994
995    pub fn metadata(&self) -> Option<&'m VariantMetadata> {
996        match self {
997            Variant::Object(VariantObject { metadata, .. })
998            | Variant::Array(VariantArray { metadata, .. }) => Some(*metadata),
999            _ => None,
1000        }
1001    }
1002}
1003
1004impl From<()> for Variant<'_, '_> {
1005    fn from((): ()) -> Self {
1006        Variant::Null
1007    }
1008}
1009
1010impl From<i8> for Variant<'_, '_> {
1011    fn from(value: i8) -> Self {
1012        Variant::Int8(value)
1013    }
1014}
1015
1016impl From<i16> for Variant<'_, '_> {
1017    fn from(value: i16) -> Self {
1018        Variant::Int16(value)
1019    }
1020}
1021
1022impl From<i32> for Variant<'_, '_> {
1023    fn from(value: i32) -> Self {
1024        Variant::Int32(value)
1025    }
1026}
1027
1028impl From<i64> for Variant<'_, '_> {
1029    fn from(value: i64) -> Self {
1030        Variant::Int64(value)
1031    }
1032}
1033
1034impl From<(i32, u8)> for Variant<'_, '_> {
1035    fn from(value: (i32, u8)) -> Self {
1036        Variant::Decimal4 {
1037            integer: value.0,
1038            scale: value.1,
1039        }
1040    }
1041}
1042
1043impl From<(i64, u8)> for Variant<'_, '_> {
1044    fn from(value: (i64, u8)) -> Self {
1045        Variant::Decimal8 {
1046            integer: value.0,
1047            scale: value.1,
1048        }
1049    }
1050}
1051
1052impl From<(i128, u8)> for Variant<'_, '_> {
1053    fn from(value: (i128, u8)) -> Self {
1054        Variant::Decimal16 {
1055            integer: value.0,
1056            scale: value.1,
1057        }
1058    }
1059}
1060
1061impl From<f32> for Variant<'_, '_> {
1062    fn from(value: f32) -> Self {
1063        Variant::Float(value)
1064    }
1065}
1066
1067impl From<f64> for Variant<'_, '_> {
1068    fn from(value: f64) -> Self {
1069        Variant::Double(value)
1070    }
1071}
1072
1073impl From<bool> for Variant<'_, '_> {
1074    fn from(value: bool) -> Self {
1075        if value {
1076            Variant::BooleanTrue
1077        } else {
1078            Variant::BooleanFalse
1079        }
1080    }
1081}
1082
1083impl From<NaiveDate> for Variant<'_, '_> {
1084    fn from(value: NaiveDate) -> Self {
1085        Variant::Date(value)
1086    }
1087}
1088
1089impl From<DateTime<Utc>> for Variant<'_, '_> {
1090    fn from(value: DateTime<Utc>) -> Self {
1091        Variant::TimestampMicros(value)
1092    }
1093}
1094impl From<NaiveDateTime> for Variant<'_, '_> {
1095    fn from(value: NaiveDateTime) -> Self {
1096        Variant::TimestampNtzMicros(value)
1097    }
1098}
1099
1100impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
1101    fn from(value: &'v [u8]) -> Self {
1102        Variant::Binary(value)
1103    }
1104}
1105
1106impl<'v> From<&'v str> for Variant<'_, 'v> {
1107    fn from(value: &'v str) -> Self {
1108        if value.len() < 64 {
1109            Variant::ShortString(value)
1110        } else {
1111            Variant::String(value)
1112        }
1113    }
1114}
1115
1116#[cfg(test)]
1117mod tests {
1118    use super::*;
1119
1120    #[test]
1121    fn test_offset() {
1122        assert_eq!(OffsetSizeBytes::try_new(0).unwrap(), OffsetSizeBytes::One);
1123        assert_eq!(OffsetSizeBytes::try_new(1).unwrap(), OffsetSizeBytes::Two);
1124        assert_eq!(OffsetSizeBytes::try_new(2).unwrap(), OffsetSizeBytes::Three);
1125        assert_eq!(OffsetSizeBytes::try_new(3).unwrap(), OffsetSizeBytes::Four);
1126
1127        // everything outside 0-3 must error
1128        assert!(OffsetSizeBytes::try_new(4).is_err());
1129        assert!(OffsetSizeBytes::try_new(255).is_err());
1130    }
1131
1132    #[test]
1133    fn unpack_usize_all_widths() {
1134        // One-byte offsets
1135        let buf_one = [0x01u8, 0xAB, 0xCD];
1136        assert_eq!(
1137            OffsetSizeBytes::One.unpack_usize(&buf_one, 0, 0).unwrap(),
1138            0x01
1139        );
1140        assert_eq!(
1141            OffsetSizeBytes::One.unpack_usize(&buf_one, 0, 2).unwrap(),
1142            0xCD
1143        );
1144
1145        // Two-byte offsets (little-endian 0x1234, 0x5678)
1146        let buf_two = [0x34, 0x12, 0x78, 0x56];
1147        assert_eq!(
1148            OffsetSizeBytes::Two.unpack_usize(&buf_two, 0, 0).unwrap(),
1149            0x1234
1150        );
1151        assert_eq!(
1152            OffsetSizeBytes::Two.unpack_usize(&buf_two, 0, 1).unwrap(),
1153            0x5678
1154        );
1155
1156        // Three-byte offsets (0x030201 and 0x0000FF)
1157        let buf_three = [0x01, 0x02, 0x03, 0xFF, 0x00, 0x00];
1158        assert_eq!(
1159            OffsetSizeBytes::Three
1160                .unpack_usize(&buf_three, 0, 0)
1161                .unwrap(),
1162            0x030201
1163        );
1164        assert_eq!(
1165            OffsetSizeBytes::Three
1166                .unpack_usize(&buf_three, 0, 1)
1167                .unwrap(),
1168            0x0000FF
1169        );
1170
1171        // Four-byte offsets (0x12345678, 0x90ABCDEF)
1172        let buf_four = [0x78, 0x56, 0x34, 0x12, 0xEF, 0xCD, 0xAB, 0x90];
1173        assert_eq!(
1174            OffsetSizeBytes::Four.unpack_usize(&buf_four, 0, 0).unwrap(),
1175            0x1234_5678
1176        );
1177        assert_eq!(
1178            OffsetSizeBytes::Four.unpack_usize(&buf_four, 0, 1).unwrap(),
1179            0x90AB_CDEF
1180        );
1181    }
1182
1183    #[test]
1184    fn unpack_usize_out_of_bounds() {
1185        let tiny = [0x00u8]; // deliberately too short
1186        assert!(OffsetSizeBytes::Two.unpack_usize(&tiny, 0, 0).is_err());
1187        assert!(OffsetSizeBytes::Three.unpack_usize(&tiny, 0, 0).is_err());
1188    }
1189
1190    #[test]
1191    fn unpack_simple() {
1192        let buf = [
1193            0x41, // header
1194            0x02, 0x00, // dictionary_size = 2
1195            0x00, 0x00, // offset[0] = 0
1196            0x05, 0x00, // offset[1] = 5
1197            0x09, 0x00, // offset[2] = 9
1198        ];
1199
1200        let width = OffsetSizeBytes::Two;
1201
1202        // dictionary_size starts immediately after the header
1203        let dict_size = width.unpack_usize(&buf, 1, 0).unwrap();
1204        assert_eq!(dict_size, 2);
1205
1206        let first = width.unpack_usize(&buf, 1, 1).unwrap();
1207        assert_eq!(first, 0);
1208
1209        let second = width.unpack_usize(&buf, 1, 2).unwrap();
1210        assert_eq!(second, 5);
1211
1212        let third = width.unpack_usize(&buf, 1, 3).unwrap();
1213        assert_eq!(third, 9);
1214
1215        let err = width.unpack_usize(&buf, 1, 4);
1216        assert!(err.is_err())
1217    }
1218
1219    /// `"cat"`, `"dog"` – valid metadata
1220    #[test]
1221    fn try_new_ok_inline() {
1222        let bytes = &[
1223            0b0000_0001, // header, offset_size_minus_one=0 and version=1
1224            0x02,        // dictionary_size (2 strings)
1225            0x00,
1226            0x03,
1227            0x06,
1228            b'c',
1229            b'a',
1230            b't',
1231            b'd',
1232            b'o',
1233            b'g',
1234        ];
1235
1236        let md = VariantMetadata::try_new(bytes).expect("should parse");
1237        assert_eq!(md.dictionary_size(), 2);
1238        // Fields
1239        assert_eq!(md.get_field_by(0).unwrap(), "cat");
1240        assert_eq!(md.get_field_by(1).unwrap(), "dog");
1241
1242        // Offsets
1243        assert_eq!(md.get_offset_by(0).unwrap(), 0x00);
1244        assert_eq!(md.get_offset_by(1).unwrap(), 0x03);
1245        // We only have 2 keys, the final offset should not be accessible using this method.
1246        let err = md.get_offset_by(2).unwrap_err();
1247
1248        assert!(
1249            matches!(err, ArrowError::InvalidArgumentError(ref msg)
1250                     if msg.contains("Index 2 out of bounds for dictionary of length 2")),
1251            "unexpected error: {err:?}"
1252        );
1253        let fields: Vec<(usize, &str)> = md
1254            .fields()
1255            .unwrap()
1256            .enumerate()
1257            .map(|(i, r)| (i, r.unwrap()))
1258            .collect();
1259        assert_eq!(fields, vec![(0usize, "cat"), (1usize, "dog")]);
1260    }
1261
1262    /// Too short buffer test (missing one required offset).
1263    /// Should error with “metadata shorter than dictionary_size implies”.
1264    #[test]
1265    fn try_new_missing_last_value() {
1266        let bytes = &[
1267            0b0000_0001, // header, offset_size_minus_one=0 and version=1
1268            0x02,        // dictionary_size = 2
1269            0x00,
1270            0x01,
1271            0x02,
1272            b'a',
1273            b'b', // <-- we'll remove this
1274        ];
1275
1276        let working_md = VariantMetadata::try_new(bytes).expect("should parse");
1277        assert_eq!(working_md.dictionary_size(), 2);
1278        assert_eq!(working_md.get_field_by(0).unwrap(), "a");
1279        assert_eq!(working_md.get_field_by(1).unwrap(), "b");
1280
1281        let truncated = &bytes[..bytes.len() - 1];
1282
1283        let err = VariantMetadata::try_new(truncated).unwrap_err();
1284        assert!(
1285            matches!(err, ArrowError::InvalidArgumentError(ref msg)
1286                     if msg.contains("Last offset")),
1287            "unexpected error: {err:?}"
1288        );
1289    }
1290
1291    #[test]
1292    fn try_new_fails_non_monotonic() {
1293        // 'cat', 'dog', 'lamb'
1294        let bytes = &[
1295            0b0000_0001, // header, offset_size_minus_one=0 and version=1
1296            0x03,        // dictionary_size
1297            0x00,
1298            0x02,
1299            0x01, // Doesn't increase monotonically
1300            0x10,
1301            b'c',
1302            b'a',
1303            b't',
1304            b'd',
1305            b'o',
1306            b'g',
1307            b'l',
1308            b'a',
1309            b'm',
1310            b'b',
1311        ];
1312
1313        let err = VariantMetadata::try_new(bytes).unwrap_err();
1314        assert!(
1315            matches!(err, ArrowError::InvalidArgumentError(ref msg) if msg.contains("monotonically")),
1316            "unexpected error: {err:?}"
1317        );
1318    }
1319
1320    #[test]
1321    fn try_new_truncated_offsets_inline() {
1322        // Missing final offset
1323        let bytes = &[0b0000_0001, 0x02, 0x00, 0x01];
1324
1325        let err = VariantMetadata::try_new(bytes).unwrap_err();
1326        assert!(
1327            matches!(err, ArrowError::InvalidArgumentError(ref msg) if msg.contains("shorter")),
1328            "unexpected error: {err:?}"
1329        );
1330    }
1331}