Skip to main content

parquet_variant_compute/
variant_get.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use arrow::{
18    array::{self, Array, ArrayRef, BinaryViewArray, StructArray},
19    compute::CastOptions,
20    datatypes::Field,
21    error::Result,
22};
23use arrow_schema::{ArrowError, DataType, FieldRef};
24use parquet_variant::{VariantPath, VariantPathElement};
25
26use crate::VariantArray;
27use crate::variant_array::BorrowedShreddingState;
28use crate::variant_to_arrow::make_variant_to_arrow_row_builder;
29
30use arrow::array::AsArray;
31use std::sync::Arc;
32
33pub(crate) enum ShreddedPathStep<'a> {
34    /// Path step succeeded, return the new shredding state
35    Success(BorrowedShreddingState<'a>),
36    /// The path element is not present in the `typed_value` column and there is no `value` column,
37    /// so we know it does not exist. It, and all paths under it, are all-NULL.
38    Missing,
39    /// The path element is not present in the `typed_value` column and must be retrieved from the `value`
40    /// column instead. The caller should be prepared to handle any value, including the requested
41    /// type, an arbitrary "wrong" type, or `Variant::Null`.
42    NotShredded,
43}
44
45/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try to take one path step
46/// deeper. For a `VariantPathElement::Field`, the step fails if there is no `typed_value` at this
47/// level, or if `typed_value` is not a struct, or if the requested field name does not exist.
48///
49/// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe not even possible.
50pub(crate) fn follow_shredded_path_element<'a>(
51    shredding_state: &BorrowedShreddingState<'a>,
52    path_element: &VariantPathElement<'_>,
53    cast_options: &CastOptions,
54) -> Result<ShreddedPathStep<'a>> {
55    // If the requested path element is not present in `typed_value`, and `value` is missing, then
56    // we know it does not exist; it, and all paths under it, are all-NULL.
57    let missing_path_step = || match shredding_state.value_field() {
58        Some(_) => ShreddedPathStep::NotShredded,
59        None => ShreddedPathStep::Missing,
60    };
61
62    let Some(typed_value) = shredding_state.typed_value_field() else {
63        return Ok(missing_path_step());
64    };
65
66    match path_element {
67        VariantPathElement::Field { name } => {
68            // Try to step into the requested field name of a struct.
69            // First, try to downcast to StructArray
70            let Some(struct_array) = typed_value.as_any().downcast_ref::<StructArray>() else {
71                // Downcast failure - if strict cast options are enabled, this should be an error
72                if !cast_options.safe {
73                    return Err(ArrowError::CastError(format!(
74                        "Cannot access field '{}' on non-struct type: {}",
75                        name,
76                        typed_value.data_type()
77                    )));
78                }
79                // With safe cast options, return NULL (missing_path_step)
80                return Ok(missing_path_step());
81            };
82
83            // Now try to find the column - missing column in a present struct is just missing data
84            let Some(field) = struct_array.column_by_name(name) else {
85                // Missing column in a present struct is just missing, not wrong - return Ok
86                return Ok(missing_path_step());
87            };
88
89            let struct_array = field.as_struct_opt().ok_or_else(|| {
90                // TODO: Should we blow up? Or just end the traversal and let the normal
91                // variant pathing code sort out the mess that it must anyway be
92                // prepared to handle?
93                ArrowError::InvalidArgumentError(format!(
94                    "Expected Struct array while following path, got {}",
95                    field.data_type(),
96                ))
97            })?;
98
99            let state = BorrowedShreddingState::try_from(struct_array)?;
100            Ok(ShreddedPathStep::Success(state))
101        }
102        VariantPathElement::Index { .. } => {
103            // TODO: Support array indexing. Among other things, it will require slicing not
104            // only the array we have here, but also the corresponding metadata and null masks.
105            Err(ArrowError::NotYetImplemented(
106                "Pathing into shredded variant array index".into(),
107            ))
108        }
109    }
110}
111
112/// Follows the given path as far as possible through shredded variant fields. If the path ends on a
113/// shredded field, return it directly. Otherwise, use a row shredder to follow the rest of the path
114/// and extract the requested value on a per-row basis.
115fn shredded_get_path(
116    input: &VariantArray,
117    path: &[VariantPathElement<'_>],
118    as_field: Option<&Field>,
119    cast_options: &CastOptions,
120) -> Result<ArrayRef> {
121    // Helper that creates a new VariantArray from the given nested value and typed_value columns,
122    // properly accounting for accumulated nulls from path traversal
123    let make_target_variant =
124        |value: Option<BinaryViewArray>,
125         typed_value: Option<ArrayRef>,
126         accumulated_nulls: Option<arrow::buffer::NullBuffer>| {
127            let metadata = input.metadata_field().clone();
128            VariantArray::from_parts(metadata, value, typed_value, accumulated_nulls)
129        };
130
131    // Helper that shreds a VariantArray to a specific type.
132    let shred_basic_variant =
133        |target: VariantArray, path: VariantPath<'_>, as_field: Option<&Field>| {
134            let as_type = as_field.map(|f| f.data_type());
135            let mut builder = make_variant_to_arrow_row_builder(
136                target.metadata_field(),
137                path,
138                as_type,
139                cast_options,
140                target.len(),
141            )?;
142            for i in 0..target.len() {
143                if target.is_null(i) {
144                    builder.append_null()?;
145                } else if !cast_options.safe {
146                    let value = target.try_value(i)?;
147                    builder.append_value(value)?;
148                } else {
149                    let _ = match target.try_value(i) {
150                        Ok(v) => builder.append_value(v)?,
151                        Err(_) => {
152                            builder.append_null()?;
153                            false // add this to make match arms have the same return type
154                        }
155                    };
156                }
157            }
158            builder.finish()
159        };
160
161    // Peel away the prefix of path elements that traverses the shredded parts of this variant
162    // column. Shredding will traverse the rest of the path on a per-row basis.
163    let mut shredding_state = input.shredding_state().borrow();
164    let mut accumulated_nulls = input.inner().nulls().cloned();
165    let mut path_index = 0;
166    for path_element in path {
167        match follow_shredded_path_element(&shredding_state, path_element, cast_options)? {
168            ShreddedPathStep::Success(state) => {
169                // Union nulls from the typed_value we just accessed
170                if let Some(typed_value) = shredding_state.typed_value_field() {
171                    accumulated_nulls = arrow::buffer::NullBuffer::union(
172                        accumulated_nulls.as_ref(),
173                        typed_value.nulls(),
174                    );
175                }
176                shredding_state = state;
177                path_index += 1;
178                continue;
179            }
180            ShreddedPathStep::Missing => {
181                let num_rows = input.len();
182                let arr = match as_field.map(|f| f.data_type()) {
183                    Some(data_type) => array::new_null_array(data_type, num_rows),
184                    None => Arc::new(array::NullArray::new(num_rows)) as _,
185                };
186                return Ok(arr);
187            }
188            ShreddedPathStep::NotShredded => {
189                let target = make_target_variant(
190                    shredding_state.value_field().cloned(),
191                    None,
192                    accumulated_nulls,
193                );
194                return shred_basic_variant(target, path[path_index..].into(), as_field);
195            }
196        };
197    }
198
199    // Path exhausted! Create a new `VariantArray` for the location we landed on.
200    let target = make_target_variant(
201        shredding_state.value_field().cloned(),
202        shredding_state.typed_value_field().cloned(),
203        accumulated_nulls,
204    );
205
206    // If our caller did not request any specific type, we can just return whatever we landed on.
207    let Some(as_field) = as_field else {
208        return Ok(ArrayRef::from(target));
209    };
210
211    // Try to return the typed value directly when we have a perfect shredding match.
212    if let Some(shredded) = try_perfect_shredding(&target, as_field) {
213        return Ok(shredded);
214    }
215
216    // Structs are special. Recurse into each field separately, hoping to follow the shredding even
217    // further, and build up the final struct from those individually shredded results.
218    if let DataType::Struct(fields) = as_field.data_type() {
219        let children = fields
220            .iter()
221            .map(|field| {
222                shredded_get_path(
223                    &target,
224                    &[VariantPathElement::from(field.name().as_str())],
225                    Some(field),
226                    cast_options,
227                )
228            })
229            .collect::<Result<Vec<_>>>()?;
230
231        let struct_nulls = target.nulls().cloned();
232
233        return Ok(Arc::new(StructArray::try_new(
234            fields.clone(),
235            children,
236            struct_nulls,
237        )?));
238    }
239
240    // Not a struct, so directly shred the variant as the requested type
241    shred_basic_variant(target, VariantPath::default(), Some(as_field))
242}
243
244fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Option<ArrayRef> {
245    // Try to return the typed value directly when we have a perfect shredding match.
246    if matches!(as_field.data_type(), DataType::Struct(_)) {
247        return None;
248    }
249    let typed_value = variant_array.typed_value_field()?;
250    if typed_value.data_type() == as_field.data_type()
251        && variant_array
252            .value_field()
253            .is_none_or(|v| v.null_count() == v.len())
254    {
255        // Here we need to gate against the case where the `typed_value` is null but data is in the `value` column.
256        // 1. If the `value` column is null, or
257        // 2. If every row in the `value` column is null
258
259        // This is a perfect shredding, where the value is entirely shredded out,
260        // so we can just return the typed value.
261        return Some(typed_value.clone());
262    }
263    None
264}
265
266/// Returns an array with the specified path extracted from the variant values.
267///
268/// The return array type depends on the `as_type` field of the options parameter
269/// 1. `as_type: None`: a VariantArray is returned. The values in this new VariantArray will point
270///    to the specified path.
271/// 2. `as_type: Some(<specific field>)`: an array of the specified type is returned.
272///
273/// TODO: How would a caller request a struct or list type where the fields/elements can be any
274/// variant? Caller can pass None as the requested type to fetch a specific path, but it would
275/// quickly become annoying (and inefficient) to call `variant_get` for each leaf value in a struct or
276/// list and then try to assemble the results.
277pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
278    let variant_array = VariantArray::try_new(input)?;
279
280    let GetOptions {
281        as_type,
282        path,
283        cast_options,
284    } = options;
285
286    shredded_get_path(&variant_array, &path, as_type.as_deref(), &cast_options)
287}
288
289/// Controls the action of the variant_get kernel.
290#[derive(Debug, Clone, Default)]
291pub struct GetOptions<'a> {
292    /// What path to extract
293    pub path: VariantPath<'a>,
294    /// if `as_type` is None, the returned array will itself be a VariantArray.
295    ///
296    /// if `as_type` is `Some(type)` the field is returned as the specified type.
297    pub as_type: Option<FieldRef>,
298    /// Controls the casting behavior (e.g. error vs substituting null on cast error).
299    pub cast_options: CastOptions<'a>,
300}
301
302impl<'a> GetOptions<'a> {
303    /// Construct default options to get the specified path as a variant.
304    pub fn new() -> Self {
305        Default::default()
306    }
307
308    /// Construct options to get the specified path as a variant.
309    pub fn new_with_path(path: VariantPath<'a>) -> Self {
310        Self {
311            path,
312            as_type: None,
313            cast_options: Default::default(),
314        }
315    }
316
317    /// Specify the type to return.
318    pub fn with_as_type(mut self, as_type: Option<FieldRef>) -> Self {
319        self.as_type = as_type;
320        self
321    }
322
323    /// Specify the cast options to use when casting to the specified type.
324    pub fn with_cast_options(mut self, cast_options: CastOptions<'a>) -> Self {
325        self.cast_options = cast_options;
326        self
327    }
328}
329
330#[cfg(test)]
331mod test {
332    use std::str::FromStr;
333    use std::sync::Arc;
334
335    use super::{GetOptions, variant_get};
336    use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
337    use crate::{VariantArray, VariantArrayBuilder, json_to_variant};
338    use arrow::array::{
339        Array, ArrayRef, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
340        Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
341        Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array,
342        LargeBinaryArray, LargeListArray, LargeListViewArray, LargeStringArray, ListArray,
343        ListViewArray, NullBuilder, StringArray, StringViewArray, StructArray,
344        Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
345    };
346    use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
347    use arrow::compute::CastOptions;
348    use arrow::datatypes::DataType::{Int16, Int32, Int64};
349    use arrow::datatypes::i256;
350    use arrow::util::display::FormatOptions;
351    use arrow_schema::DataType::{Boolean, Float32, Float64, Int8};
352    use arrow_schema::{DataType, Field, FieldRef, Fields, IntervalUnit, TimeUnit};
353    use chrono::DateTime;
354    use parquet_variant::{
355        EMPTY_VARIANT_METADATA_BYTES, Variant, VariantBuilder, VariantDecimal4, VariantDecimal8,
356        VariantDecimal16, VariantDecimalType, VariantPath,
357    };
358
359    fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) {
360        // Create input array from JSON string
361        let input_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(input_json)]));
362        let input_variant_array_ref = ArrayRef::from(json_to_variant(&input_array_ref).unwrap());
363
364        let result =
365            variant_get(&input_variant_array_ref, GetOptions::new_with_path(path)).unwrap();
366
367        // Create expected array from JSON string
368        let expected_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(expected_json)]));
369        let expected_variant_array = json_to_variant(&expected_array_ref).unwrap();
370
371        let result_array = VariantArray::try_new(&result).unwrap();
372        assert_eq!(
373            result_array.len(),
374            1,
375            "Expected result array to have length 1"
376        );
377        assert!(
378            result_array.nulls().is_none(),
379            "Expected no nulls in result array"
380        );
381        let result_variant = result_array.value(0);
382        let expected_variant = expected_variant_array.value(0);
383        assert_eq!(
384            result_variant, expected_variant,
385            "Result variant does not match expected variant"
386        );
387    }
388
389    #[test]
390    fn get_primitive_variant_field() {
391        single_variant_get_test(
392            r#"{"some_field": 1234}"#,
393            VariantPath::from("some_field"),
394            "1234",
395        );
396    }
397
398    #[test]
399    fn get_primitive_variant_list_index() {
400        single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
401    }
402
403    #[test]
404    fn get_primitive_variant_inside_object_of_object() {
405        single_variant_get_test(
406            r#"{"top_level_field": {"inner_field": 1234}}"#,
407            VariantPath::from("top_level_field").join("inner_field"),
408            "1234",
409        );
410    }
411
412    #[test]
413    fn get_primitive_variant_inside_list_of_object() {
414        single_variant_get_test(
415            r#"[{"some_field": 1234}]"#,
416            VariantPath::from(0).join("some_field"),
417            "1234",
418        );
419    }
420
421    #[test]
422    fn get_primitive_variant_inside_object_of_list() {
423        single_variant_get_test(
424            r#"{"some_field": [1234]}"#,
425            VariantPath::from("some_field").join(0),
426            "1234",
427        );
428    }
429
430    #[test]
431    fn get_complex_variant() {
432        single_variant_get_test(
433            r#"{"top_level_field": {"inner_field": 1234}}"#,
434            VariantPath::from("top_level_field"),
435            r#"{"inner_field": 1234}"#,
436        );
437    }
438
439    /// Partial Shredding: extract a value as a VariantArray
440    macro_rules! numeric_partially_shredded_test {
441        ($primitive_type:ty, $data_fn:ident) => {
442            let array = $data_fn();
443            let options = GetOptions::new();
444            let result = variant_get(&array, options).unwrap();
445
446            // expect the result is a VariantArray
447            let result = VariantArray::try_new(&result).unwrap();
448            assert_eq!(result.len(), 4);
449
450            // Expect the values are the same as the original values
451            assert_eq!(
452                result.value(0),
453                Variant::from(<$primitive_type>::try_from(34u8).unwrap())
454            );
455            assert!(!result.is_valid(1));
456            assert_eq!(result.value(2), Variant::from("n/a"));
457            assert_eq!(
458                result.value(3),
459                Variant::from(<$primitive_type>::try_from(100u8).unwrap())
460            );
461        };
462    }
463
464    macro_rules! partially_shredded_variant_array_gen {
465        ($func_name:ident,  $typed_value_array_gen: expr) => {
466            fn $func_name() -> ArrayRef {
467                let (metadata, string_value) = {
468                    let mut builder = parquet_variant::VariantBuilder::new();
469                    builder.append_value("n/a");
470                    builder.finish()
471                };
472
473                let nulls = NullBuffer::from(vec![
474                    true,  // row 0 non null
475                    false, // row 1 is null
476                    true,  // row 2 non null
477                    true,  // row 3 non null
478                ]);
479
480                // metadata is the same for all rows
481                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
482
483                // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
484                // about why row1 is an empty but non null, value.
485                let values = BinaryViewArray::from(vec![
486                    None,                // row 0 is shredded, so no value
487                    Some(b"" as &[u8]),  // row 1 is null, so empty value (why?)
488                    Some(&string_value), // copy the string value "N/A"
489                    None,                // row 3 is shredded, so no value
490                ]);
491
492                let typed_value = $typed_value_array_gen();
493
494                let struct_array = StructArrayBuilder::new()
495                    .with_field("metadata", Arc::new(metadata), false)
496                    .with_field("typed_value", Arc::new(typed_value), true)
497                    .with_field("value", Arc::new(values), true)
498                    .with_nulls(nulls)
499                    .build();
500                ArrayRef::from(
501                    VariantArray::try_new(&struct_array).expect("should create variant array"),
502                )
503            }
504        };
505    }
506
507    #[test]
508    fn get_variant_partially_shredded_int8_as_variant() {
509        numeric_partially_shredded_test!(i8, partially_shredded_int8_variant_array);
510    }
511
512    #[test]
513    fn get_variant_partially_shredded_int16_as_variant() {
514        numeric_partially_shredded_test!(i16, partially_shredded_int16_variant_array);
515    }
516
517    #[test]
518    fn get_variant_partially_shredded_int32_as_variant() {
519        numeric_partially_shredded_test!(i32, partially_shredded_int32_variant_array);
520    }
521
522    #[test]
523    fn get_variant_partially_shredded_int64_as_variant() {
524        numeric_partially_shredded_test!(i64, partially_shredded_int64_variant_array);
525    }
526
527    #[test]
528    fn get_variant_partially_shredded_float32_as_variant() {
529        numeric_partially_shredded_test!(f32, partially_shredded_float32_variant_array);
530    }
531
532    #[test]
533    fn get_variant_partially_shredded_float64_as_variant() {
534        numeric_partially_shredded_test!(f64, partially_shredded_float64_variant_array);
535    }
536
537    #[test]
538    fn get_variant_partially_shredded_bool_as_variant() {
539        let array = partially_shredded_bool_variant_array();
540        let options = GetOptions::new();
541        let result = variant_get(&array, options).unwrap();
542
543        // expect the result is a VariantArray
544        let result = VariantArray::try_new(&result).unwrap();
545        assert_eq!(result.len(), 4);
546
547        // Expect the values are the same as the original values
548        assert_eq!(result.value(0), Variant::from(true));
549        assert!(!result.is_valid(1));
550        assert_eq!(result.value(2), Variant::from("n/a"));
551        assert_eq!(result.value(3), Variant::from(false));
552    }
553
554    #[test]
555    fn get_variant_partially_shredded_utf8_as_variant() {
556        let array = partially_shredded_utf8_variant_array();
557        let options = GetOptions::new();
558        let result = variant_get(&array, options).unwrap();
559
560        // expect the result is a VariantArray
561        let result = VariantArray::try_new(&result).unwrap();
562        assert_eq!(result.len(), 4);
563
564        // Expect the values are the same as the original values
565        assert_eq!(result.value(0), Variant::from("hello"));
566        assert!(!result.is_valid(1));
567        assert_eq!(result.value(2), Variant::from("n/a"));
568        assert_eq!(result.value(3), Variant::from("world"));
569    }
570
571    partially_shredded_variant_array_gen!(partially_shredded_binary_view_variant_array, || {
572        BinaryViewArray::from(vec![
573            Some(&[1u8, 2u8, 3u8][..]), // row 0 is shredded
574            None,                       // row 1 is null
575            None,                       // row 2 is a string
576            Some(&[4u8, 5u8, 6u8][..]), // row 3 is shredded
577        ])
578    });
579
580    #[test]
581    fn get_variant_partially_shredded_date32_as_variant() {
582        let array = partially_shredded_date32_variant_array();
583        let options = GetOptions::new();
584        let result = variant_get(&array, options).unwrap();
585
586        // expect the result is a VariantArray
587        let result = VariantArray::try_new(&result).unwrap();
588        assert_eq!(result.len(), 4);
589
590        // Expect the values are the same as the original values
591        use chrono::NaiveDate;
592        let date1 = NaiveDate::from_ymd_opt(2025, 9, 17).unwrap();
593        let date2 = NaiveDate::from_ymd_opt(2025, 9, 9).unwrap();
594        assert_eq!(result.value(0), Variant::from(date1));
595        assert!(!result.is_valid(1));
596        assert_eq!(result.value(2), Variant::from("n/a"));
597        assert_eq!(result.value(3), Variant::from(date2));
598    }
599
600    #[test]
601    fn get_variant_partially_shredded_binary_view_as_variant() {
602        let array = partially_shredded_binary_view_variant_array();
603        let options = GetOptions::new();
604        let result = variant_get(&array, options).unwrap();
605
606        // expect the result is a VariantArray
607        let result = VariantArray::try_new(&result).unwrap();
608        assert_eq!(result.len(), 4);
609
610        // Expect the values are the same as the original values
611        assert_eq!(result.value(0), Variant::from(&[1u8, 2u8, 3u8][..]));
612        assert!(!result.is_valid(1));
613        assert_eq!(result.value(2), Variant::from("n/a"));
614        assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..]));
615    }
616
617    /// Shredding: extract a value as an Int32Array
618    #[test]
619    fn get_variant_shredded_int32_as_int32_safe_cast() {
620        // Extract the typed value as Int32Array
621        let array = partially_shredded_int32_variant_array();
622        // specify we want the typed value as Int32
623        let field = Field::new("typed_value", DataType::Int32, true);
624        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
625        let result = variant_get(&array, options).unwrap();
626        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
627            Some(34),
628            None,
629            None, // "n/a" is not an Int32 so converted to null
630            Some(100),
631        ]));
632        assert_eq!(&result, &expected)
633    }
634
635    /// Shredding: extract a value as an Int32Array, unsafe cast (should error on "n/a")
636    #[test]
637    fn get_variant_shredded_int32_as_int32_unsafe_cast() {
638        // Extract the typed value as Int32Array
639        let array = partially_shredded_int32_variant_array();
640        let field = Field::new("typed_value", DataType::Int32, true);
641        let cast_options = CastOptions {
642            safe: false, // unsafe cast
643            ..Default::default()
644        };
645        let options = GetOptions::new()
646            .with_as_type(Some(FieldRef::from(field)))
647            .with_cast_options(cast_options);
648
649        let err = variant_get(&array, options).unwrap_err();
650        // TODO make this error message nicer (not Debug format)
651        assert_eq!(
652            err.to_string(),
653            "Cast error: Failed to extract primitive of type Int32 from variant ShortString(ShortString(\"n/a\")) at path VariantPath([])"
654        );
655    }
656
657    /// Perfect Shredding: extract the typed value as a VariantArray
658    macro_rules! numeric_perfectly_shredded_test {
659        ($primitive_type:ty, $data_fn:ident) => {
660            let array = $data_fn();
661            let options = GetOptions::new();
662            let result = variant_get(&array, options).unwrap();
663
664            // expect the result is a VariantArray
665            let result = VariantArray::try_new(&result).unwrap();
666            assert_eq!(result.len(), 3);
667
668            // Expect the values are the same as the original values
669            assert_eq!(
670                result.value(0),
671                Variant::from(<$primitive_type>::try_from(1u8).unwrap())
672            );
673            assert_eq!(
674                result.value(1),
675                Variant::from(<$primitive_type>::try_from(2u8).unwrap())
676            );
677            assert_eq!(
678                result.value(2),
679                Variant::from(<$primitive_type>::try_from(3u8).unwrap())
680            );
681        };
682    }
683
684    #[test]
685    fn get_variant_perfectly_shredded_int8_as_variant() {
686        numeric_perfectly_shredded_test!(i8, perfectly_shredded_int8_variant_array);
687    }
688
689    #[test]
690    fn get_variant_perfectly_shredded_int16_as_variant() {
691        numeric_perfectly_shredded_test!(i16, perfectly_shredded_int16_variant_array);
692    }
693
694    #[test]
695    fn get_variant_perfectly_shredded_int32_as_variant() {
696        numeric_perfectly_shredded_test!(i32, perfectly_shredded_int32_variant_array);
697    }
698
699    #[test]
700    fn get_variant_perfectly_shredded_int64_as_variant() {
701        numeric_perfectly_shredded_test!(i64, perfectly_shredded_int64_variant_array);
702    }
703
704    #[test]
705    fn get_variant_perfectly_shredded_float32_as_variant() {
706        numeric_perfectly_shredded_test!(f32, perfectly_shredded_float32_variant_array);
707    }
708
709    #[test]
710    fn get_variant_perfectly_shredded_float64_as_variant() {
711        numeric_perfectly_shredded_test!(f64, perfectly_shredded_float64_variant_array);
712    }
713
714    /// AllNull: extract a value as a VariantArray
715    #[test]
716    fn get_variant_all_null_as_variant() {
717        let array = all_null_variant_array();
718        let options = GetOptions::new();
719        let result = variant_get(&array, options).unwrap();
720
721        // expect the result is a VariantArray
722        let result = VariantArray::try_new(&result).unwrap();
723        assert_eq!(result.len(), 3);
724
725        // All values should be null
726        assert!(!result.is_valid(0));
727        assert!(!result.is_valid(1));
728        assert!(!result.is_valid(2));
729    }
730
731    /// AllNull: extract a value as an Int32Array
732    #[test]
733    fn get_variant_all_null_as_int32() {
734        let array = all_null_variant_array();
735        // specify we want the typed value as Int32
736        let field = Field::new("typed_value", DataType::Int32, true);
737        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
738        let result = variant_get(&array, options).unwrap();
739
740        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
741            Option::<i32>::None,
742            Option::<i32>::None,
743            Option::<i32>::None,
744        ]));
745        assert_eq!(&result, &expected)
746    }
747
748    macro_rules! perfectly_shredded_to_arrow_primitive_test {
749        ($name:ident, $primitive_type:expr, $perfectly_shredded_array_gen_fun:ident, $expected_array:expr) => {
750            #[test]
751            fn $name() {
752                let array = $perfectly_shredded_array_gen_fun();
753                let field = Field::new("typed_value", $primitive_type, true);
754                let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
755                let result = variant_get(&array, options).unwrap();
756                let expected_array: ArrayRef = Arc::new($expected_array);
757                assert_eq!(&result, &expected_array);
758            }
759        };
760    }
761
762    perfectly_shredded_to_arrow_primitive_test!(
763        get_variant_perfectly_shredded_int18_as_int8,
764        Int8,
765        perfectly_shredded_int8_variant_array,
766        Int8Array::from(vec![Some(1), Some(2), Some(3)])
767    );
768
769    perfectly_shredded_to_arrow_primitive_test!(
770        get_variant_perfectly_shredded_int16_as_int16,
771        Int16,
772        perfectly_shredded_int16_variant_array,
773        Int16Array::from(vec![Some(1), Some(2), Some(3)])
774    );
775
776    perfectly_shredded_to_arrow_primitive_test!(
777        get_variant_perfectly_shredded_int32_as_int32,
778        Int32,
779        perfectly_shredded_int32_variant_array,
780        Int32Array::from(vec![Some(1), Some(2), Some(3)])
781    );
782
783    perfectly_shredded_to_arrow_primitive_test!(
784        get_variant_perfectly_shredded_int64_as_int64,
785        Int64,
786        perfectly_shredded_int64_variant_array,
787        Int64Array::from(vec![Some(1), Some(2), Some(3)])
788    );
789
790    perfectly_shredded_to_arrow_primitive_test!(
791        get_variant_perfectly_shredded_float32_as_float32,
792        Float32,
793        perfectly_shredded_float32_variant_array,
794        Float32Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
795    );
796
797    perfectly_shredded_to_arrow_primitive_test!(
798        get_variant_perfectly_shredded_float64_as_float64,
799        Float64,
800        perfectly_shredded_float64_variant_array,
801        Float64Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
802    );
803
804    perfectly_shredded_to_arrow_primitive_test!(
805        get_variant_perfectly_shredded_boolean_as_boolean,
806        Boolean,
807        perfectly_shredded_bool_variant_array,
808        BooleanArray::from(vec![Some(true), Some(false), Some(true)])
809    );
810
811    perfectly_shredded_to_arrow_primitive_test!(
812        get_variant_perfectly_shredded_utf8_as_utf8,
813        DataType::Utf8,
814        perfectly_shredded_utf8_variant_array,
815        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
816    );
817
818    perfectly_shredded_to_arrow_primitive_test!(
819        get_variant_perfectly_shredded_large_utf8_as_utf8,
820        DataType::Utf8,
821        perfectly_shredded_large_utf8_variant_array,
822        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
823    );
824
825    perfectly_shredded_to_arrow_primitive_test!(
826        get_variant_perfectly_shredded_utf8_view_as_utf8,
827        DataType::Utf8,
828        perfectly_shredded_utf8_view_variant_array,
829        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
830    );
831
832    macro_rules! perfectly_shredded_variant_array_fn {
833        ($func:ident, $typed_value_gen:expr) => {
834            fn $func() -> ArrayRef {
835                // At the time of writing, the `VariantArrayBuilder` does not support shredding.
836                // so we must construct the array manually.  see https://github.com/apache/arrow-rs/issues/7895
837                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
838                    EMPTY_VARIANT_METADATA_BYTES,
839                    3,
840                ));
841                let typed_value = $typed_value_gen();
842
843                let struct_array = StructArrayBuilder::new()
844                    .with_field("metadata", Arc::new(metadata), false)
845                    .with_field("typed_value", Arc::new(typed_value), true)
846                    .build();
847
848                VariantArray::try_new(&struct_array)
849                    .expect("should create variant array")
850                    .into()
851            }
852        };
853    }
854
855    perfectly_shredded_variant_array_fn!(perfectly_shredded_utf8_variant_array, || {
856        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
857    });
858
859    perfectly_shredded_variant_array_fn!(perfectly_shredded_large_utf8_variant_array, || {
860        LargeStringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
861    });
862
863    perfectly_shredded_variant_array_fn!(perfectly_shredded_utf8_view_variant_array, || {
864        StringViewArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
865    });
866
867    perfectly_shredded_variant_array_fn!(perfectly_shredded_bool_variant_array, || {
868        BooleanArray::from(vec![Some(true), Some(false), Some(true)])
869    });
870
871    /// Return a VariantArray that represents a perfectly "shredded" variant
872    /// for the given typed value.
873    ///
874    /// The schema of the corresponding `StructArray` would look like this:
875    ///
876    /// ```text
877    /// StructArray {
878    ///   metadata: BinaryViewArray,
879    ///   typed_value: Int32Array,
880    /// }
881    /// ```
882    macro_rules! numeric_perfectly_shredded_variant_array_fn {
883        ($func:ident, $array_type:ident, $primitive_type:ty) => {
884            perfectly_shredded_variant_array_fn!($func, || {
885                $array_type::from(vec![
886                    Some(<$primitive_type>::try_from(1u8).unwrap()),
887                    Some(<$primitive_type>::try_from(2u8).unwrap()),
888                    Some(<$primitive_type>::try_from(3u8).unwrap()),
889                ])
890            });
891        };
892    }
893
894    numeric_perfectly_shredded_variant_array_fn!(
895        perfectly_shredded_int8_variant_array,
896        Int8Array,
897        i8
898    );
899    numeric_perfectly_shredded_variant_array_fn!(
900        perfectly_shredded_int16_variant_array,
901        Int16Array,
902        i16
903    );
904    numeric_perfectly_shredded_variant_array_fn!(
905        perfectly_shredded_int32_variant_array,
906        Int32Array,
907        i32
908    );
909    numeric_perfectly_shredded_variant_array_fn!(
910        perfectly_shredded_int64_variant_array,
911        Int64Array,
912        i64
913    );
914    numeric_perfectly_shredded_variant_array_fn!(
915        perfectly_shredded_float32_variant_array,
916        Float32Array,
917        f32
918    );
919    numeric_perfectly_shredded_variant_array_fn!(
920        perfectly_shredded_float64_variant_array,
921        Float64Array,
922        f64
923    );
924
925    perfectly_shredded_variant_array_fn!(
926        perfectly_shredded_timestamp_micro_ntz_variant_array,
927        || {
928            arrow::array::TimestampMicrosecondArray::from(vec![
929                Some(-456000),
930                Some(1758602096000001),
931                Some(1758602096000002),
932            ])
933        }
934    );
935
936    perfectly_shredded_to_arrow_primitive_test!(
937        get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_micro_ntz,
938        DataType::Timestamp(TimeUnit::Microsecond, None),
939        perfectly_shredded_timestamp_micro_ntz_variant_array,
940        arrow::array::TimestampMicrosecondArray::from(vec![
941            Some(-456000),
942            Some(1758602096000001),
943            Some(1758602096000002),
944        ])
945    );
946
947    // test converting micro to nano
948    perfectly_shredded_to_arrow_primitive_test!(
949        get_variant_perfectly_shredded_timestamp_micro_ntz_as_nano_ntz,
950        DataType::Timestamp(TimeUnit::Nanosecond, None),
951        perfectly_shredded_timestamp_micro_ntz_variant_array,
952        arrow::array::TimestampNanosecondArray::from(vec![
953            Some(-456000000),
954            Some(1758602096000001000),
955            Some(1758602096000002000)
956        ])
957    );
958
959    perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_micro_variant_array, || {
960        arrow::array::TimestampMicrosecondArray::from(vec![
961            Some(-456000),
962            Some(1758602096000001),
963            Some(1758602096000002),
964        ])
965        .with_timezone("+00:00")
966    });
967
968    perfectly_shredded_to_arrow_primitive_test!(
969        get_variant_perfectly_shredded_timestamp_micro_as_timestamp_micro,
970        DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("+00:00"))),
971        perfectly_shredded_timestamp_micro_variant_array,
972        arrow::array::TimestampMicrosecondArray::from(vec![
973            Some(-456000),
974            Some(1758602096000001),
975            Some(1758602096000002),
976        ])
977        .with_timezone("+00:00")
978    );
979
980    // test converting micro to nano
981    perfectly_shredded_to_arrow_primitive_test!(
982        get_variant_perfectly_shredded_timestamp_micro_as_nano,
983        DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
984        perfectly_shredded_timestamp_micro_variant_array,
985        arrow::array::TimestampNanosecondArray::from(vec![
986            Some(-456000000),
987            Some(1758602096000001000),
988            Some(1758602096000002000)
989        ])
990        .with_timezone("+00:00")
991    );
992
993    perfectly_shredded_variant_array_fn!(
994        perfectly_shredded_timestamp_nano_ntz_variant_array,
995        || {
996            arrow::array::TimestampNanosecondArray::from(vec![
997                Some(-4999999561),
998                Some(1758602096000000001),
999                Some(1758602096000000002),
1000            ])
1001        }
1002    );
1003
1004    perfectly_shredded_variant_array_fn!(
1005        perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
1006        || {
1007            arrow::array::TimestampMicrosecondArray::from(vec![
1008                Some(1234),       // can't be cast to second & millisecond
1009                Some(1234000),    // can be cast to millisecond, but not second
1010                Some(1234000000), // can be cast to second & millisecond
1011            ])
1012            .with_timezone("+00:00")
1013        }
1014    );
1015
1016    // The following two tests wants to cover the micro with timezone -> milli/second cases
1017    // there are three test items, which contains some items can be cast safely, and some can't
1018    perfectly_shredded_to_arrow_primitive_test!(
1019        get_variant_perfectly_shredded_timestamp_micro_as_timestamp_second,
1020        DataType::Timestamp(TimeUnit::Second, Some(Arc::from("+00:00"))),
1021        perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
1022        arrow::array::TimestampSecondArray::from(vec![
1023            None,
1024            None, // Return None if can't be cast to second safely
1025            Some(1234)
1026        ])
1027        .with_timezone("+00:00")
1028    );
1029
1030    perfectly_shredded_to_arrow_primitive_test!(
1031        get_variant_perfectly_shredded_timestamp_micro_as_timestamp_milli,
1032        DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("+00:00"))),
1033        perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
1034        arrow::array::TimestampMillisecondArray::from(vec![
1035            None, // Return None if can't be cast to millisecond safely
1036            Some(1234),
1037            Some(1234000)
1038        ])
1039        .with_timezone("+00:00")
1040    );
1041
1042    perfectly_shredded_variant_array_fn!(
1043        perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
1044        || {
1045            arrow::array::TimestampMicrosecondArray::from(vec![
1046                Some(1234),       // can't be cast to second & millisecond
1047                Some(1234000),    // can be cast to millisecond, but not second
1048                Some(1234000000), // can be cast to second & millisecond
1049            ])
1050        }
1051    );
1052
1053    // The following two tests wants to cover the micro_ntz -> milli/second cases
1054    // there are three test items, which contains some items can be cast safely, and some can't
1055    perfectly_shredded_to_arrow_primitive_test!(
1056        get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_second,
1057        DataType::Timestamp(TimeUnit::Second, None),
1058        perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
1059        arrow::array::TimestampSecondArray::from(vec![
1060            None,
1061            None, // Return None if can't be cast to second safely
1062            Some(1234)
1063        ])
1064    );
1065
1066    perfectly_shredded_to_arrow_primitive_test!(
1067        get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_milli,
1068        DataType::Timestamp(TimeUnit::Millisecond, None),
1069        perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
1070        arrow::array::TimestampMillisecondArray::from(vec![
1071            None, // Return None if can't be cast to millisecond safely
1072            Some(1234),
1073            Some(1234000)
1074        ])
1075    );
1076
1077    perfectly_shredded_variant_array_fn!(
1078        perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
1079        || {
1080            arrow::array::TimestampNanosecondArray::from(vec![
1081                Some(1234000),       // can't be cast to second & millisecond
1082                Some(1234000000),    // can be cast to millisecond, but not second
1083                Some(1234000000000), // can be cast to second & millisecond
1084            ])
1085            .with_timezone("+00:00")
1086        }
1087    );
1088
1089    // The following two tests wants to cover the nano with timezone -> milli/second cases
1090    // there are three test items, which contains some items can be cast safely, and some can't
1091    perfectly_shredded_to_arrow_primitive_test!(
1092        get_variant_perfectly_shredded_timestamp_nano_as_timestamp_second,
1093        DataType::Timestamp(TimeUnit::Second, Some(Arc::from("+00:00"))),
1094        perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
1095        arrow::array::TimestampSecondArray::from(vec![
1096            None,
1097            None, // Return None if can't be cast to second safely
1098            Some(1234)
1099        ])
1100        .with_timezone("+00:00")
1101    );
1102
1103    perfectly_shredded_to_arrow_primitive_test!(
1104        get_variant_perfectly_shredded_timestamp_nano_as_timestamp_milli,
1105        DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("+00:00"))),
1106        perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
1107        arrow::array::TimestampMillisecondArray::from(vec![
1108            None, // Return None if can't be cast to millisecond safely
1109            Some(1234),
1110            Some(1234000)
1111        ])
1112        .with_timezone("+00:00")
1113    );
1114
1115    perfectly_shredded_variant_array_fn!(
1116        perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
1117        || {
1118            arrow::array::TimestampNanosecondArray::from(vec![
1119                Some(1234000),       // can't be cast to second & millisecond
1120                Some(1234000000),    // can be cast to millisecond, but not second
1121                Some(1234000000000), // can be cast to second & millisecond
1122            ])
1123        }
1124    );
1125
1126    // The following two tests wants to cover the nano_ntz -> milli/second cases
1127    // there are three test items, which contains some items can be cast safely, and some can't
1128    perfectly_shredded_to_arrow_primitive_test!(
1129        get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_second,
1130        DataType::Timestamp(TimeUnit::Second, None),
1131        perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
1132        arrow::array::TimestampSecondArray::from(vec![
1133            None,
1134            None, // Return None if can't be cast to second safely
1135            Some(1234)
1136        ])
1137    );
1138
1139    perfectly_shredded_to_arrow_primitive_test!(
1140        get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_milli,
1141        DataType::Timestamp(TimeUnit::Millisecond, None),
1142        perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
1143        arrow::array::TimestampMillisecondArray::from(vec![
1144            None, // Return None if can't be cast to millisecond safely
1145            Some(1234),
1146            Some(1234000)
1147        ])
1148    );
1149
1150    perfectly_shredded_to_arrow_primitive_test!(
1151        get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_nano_ntz,
1152        DataType::Timestamp(TimeUnit::Nanosecond, None),
1153        perfectly_shredded_timestamp_nano_ntz_variant_array,
1154        arrow::array::TimestampNanosecondArray::from(vec![
1155            Some(-4999999561),
1156            Some(1758602096000000001),
1157            Some(1758602096000000002),
1158        ])
1159    );
1160
1161    perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_nano_variant_array, || {
1162        arrow::array::TimestampNanosecondArray::from(vec![
1163            Some(-4999999561),
1164            Some(1758602096000000001),
1165            Some(1758602096000000002),
1166        ])
1167        .with_timezone("+00:00")
1168    });
1169
1170    perfectly_shredded_to_arrow_primitive_test!(
1171        get_variant_perfectly_shredded_timestamp_nano_as_timestamp_nano,
1172        DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
1173        perfectly_shredded_timestamp_nano_variant_array,
1174        arrow::array::TimestampNanosecondArray::from(vec![
1175            Some(-4999999561),
1176            Some(1758602096000000001),
1177            Some(1758602096000000002),
1178        ])
1179        .with_timezone("+00:00")
1180    );
1181
1182    perfectly_shredded_variant_array_fn!(perfectly_shredded_date_variant_array, || {
1183        Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
1184    });
1185
1186    perfectly_shredded_to_arrow_primitive_test!(
1187        get_variant_perfectly_shredded_date_as_date,
1188        DataType::Date32,
1189        perfectly_shredded_date_variant_array,
1190        Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
1191    );
1192
1193    perfectly_shredded_to_arrow_primitive_test!(
1194        get_variant_perfectly_shredded_date_as_date64,
1195        DataType::Date64,
1196        perfectly_shredded_date_variant_array,
1197        Date64Array::from(vec![
1198            Some(-1066608000000),
1199            Some(1519430400000),
1200            Some(1728000000000)
1201        ])
1202    );
1203
1204    perfectly_shredded_variant_array_fn!(perfectly_shredded_time_variant_array, || {
1205        Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
1206    });
1207
1208    perfectly_shredded_to_arrow_primitive_test!(
1209        get_variant_perfectly_shredded_time_as_time,
1210        DataType::Time64(TimeUnit::Microsecond),
1211        perfectly_shredded_time_variant_array,
1212        Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
1213    );
1214
1215    perfectly_shredded_to_arrow_primitive_test!(
1216        get_variant_perfectly_shredded_time_as_time64_nano,
1217        DataType::Time64(TimeUnit::Nanosecond),
1218        perfectly_shredded_time_variant_array,
1219        Time64NanosecondArray::from(vec![
1220            Some(12345000000),
1221            Some(87654000000),
1222            Some(135792000000)
1223        ])
1224    );
1225
1226    perfectly_shredded_variant_array_fn!(perfectly_shredded_time_variant_array_for_time32, || {
1227        Time64MicrosecondArray::from(vec![
1228            Some(1234),        // This can't be cast to Time32 losslessly
1229            Some(7654000),     // This can be cast to Time32(Millisecond), but not Time32(Second)
1230            Some(35792000000), // This can be cast to Time32(Second) & Time32(Millisecond)
1231        ])
1232    });
1233
1234    perfectly_shredded_to_arrow_primitive_test!(
1235        get_variant_perfectly_shredded_time_as_time32_second,
1236        DataType::Time32(TimeUnit::Second),
1237        perfectly_shredded_time_variant_array_for_time32,
1238        Time32SecondArray::from(vec![
1239            None,
1240            None, // Return None if can't be cast to Time32(Second) safely
1241            Some(35792)
1242        ])
1243    );
1244
1245    perfectly_shredded_to_arrow_primitive_test!(
1246        get_variant_perfectly_shredded_time_as_time32_milli,
1247        DataType::Time32(TimeUnit::Millisecond),
1248        perfectly_shredded_time_variant_array_for_time32,
1249        Time32MillisecondArray::from(vec![
1250            None, // Return None if can't be cast to Time32(Second) safely
1251            Some(7654),
1252            Some(35792000)
1253        ])
1254    );
1255
1256    perfectly_shredded_variant_array_fn!(perfectly_shredded_null_variant_array, || {
1257        let mut builder = NullBuilder::new();
1258        builder.append_nulls(3);
1259        builder.finish()
1260    });
1261
1262    perfectly_shredded_to_arrow_primitive_test!(
1263        get_variant_perfectly_shredded_null_as_null,
1264        DataType::Null,
1265        perfectly_shredded_null_variant_array,
1266        arrow::array::NullArray::new(3)
1267    );
1268
1269    perfectly_shredded_variant_array_fn!(perfectly_shredded_null_variant_array_with_int, || {
1270        Int32Array::from(vec![Some(32), Some(64), Some(48)])
1271    });
1272
1273    // We append null values if type miss match happens in safe mode
1274    perfectly_shredded_to_arrow_primitive_test!(
1275        get_variant_perfectly_shredded_null_with_type_missmatch_in_safe_mode,
1276        DataType::Null,
1277        perfectly_shredded_null_variant_array_with_int,
1278        arrow::array::NullArray::new(3)
1279    );
1280
1281    // We'll return an error if type miss match happens in strict mode
1282    #[test]
1283    fn get_variant_perfectly_shredded_null_as_null_with_type_missmatch_in_strict_mode() {
1284        let array = perfectly_shredded_null_variant_array_with_int();
1285        let field = Field::new("typed_value", DataType::Null, true);
1286        let options = GetOptions::new()
1287            .with_as_type(Some(FieldRef::from(field)))
1288            .with_cast_options(CastOptions {
1289                safe: false,
1290                format_options: FormatOptions::default(),
1291            });
1292
1293        let result = variant_get(&array, options);
1294
1295        assert!(result.is_err());
1296        let error_msg = format!("{}", result.unwrap_err());
1297        assert!(
1298            error_msg
1299                .contains("Cast error: Failed to extract primitive of type Null from variant Int32(32) at path VariantPath([])"),
1300            "Expected=[Cast error: Failed to extract primitive of type Null from variant Int32(32) at path VariantPath([])],\
1301                Got error message=[{}]",
1302            error_msg
1303        );
1304    }
1305
1306    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal4_variant_array, || {
1307        Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
1308            .with_precision_and_scale(5, 2)
1309            .unwrap()
1310    });
1311
1312    perfectly_shredded_to_arrow_primitive_test!(
1313        get_variant_perfectly_shredded_decimal4_as_decimal4,
1314        DataType::Decimal32(5, 2),
1315        perfectly_shredded_decimal4_variant_array,
1316        Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
1317            .with_precision_and_scale(5, 2)
1318            .unwrap()
1319    );
1320
1321    perfectly_shredded_variant_array_fn!(
1322        perfectly_shredded_decimal8_variant_array_cast2decimal32,
1323        || {
1324            Decimal64Array::from(vec![Some(123456), Some(145678), Some(-123456)])
1325                .with_precision_and_scale(6, 1)
1326                .unwrap()
1327        }
1328    );
1329
1330    // The input will be cast to Decimal32 when transformed to Variant
1331    // This tests will covert the logic DataType::Decimal64(the original array)
1332    // -> Variant::Decimal4(VariantArray) -> DataType::Decimal64(the result array)
1333    perfectly_shredded_to_arrow_primitive_test!(
1334        get_variant_perfectly_shredded_decimal8_through_decimal32_as_decimal8,
1335        DataType::Decimal64(6, 1),
1336        perfectly_shredded_decimal8_variant_array_cast2decimal32,
1337        Decimal64Array::from(vec![Some(123456), Some(145678), Some(-123456)])
1338            .with_precision_and_scale(6, 1)
1339            .unwrap()
1340    );
1341
1342    // This tests will covert the logic DataType::Decimal64(the original array)
1343    //  -> Variant::Decimal8(VariantArray) -> DataType::Decimal64(the result array)
1344    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal8_variant_array, || {
1345        Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1346            .with_precision_and_scale(10, 1)
1347            .unwrap()
1348    });
1349
1350    perfectly_shredded_to_arrow_primitive_test!(
1351        get_variant_perfectly_shredded_decimal8_as_decimal8,
1352        DataType::Decimal64(10, 1),
1353        perfectly_shredded_decimal8_variant_array,
1354        Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1355            .with_precision_and_scale(10, 1)
1356            .unwrap()
1357    );
1358
1359    // This tests will covert the logic DataType::Decimal128(the original array)
1360    //  -> Variant::Decimal4(VariantArray) -> DataType::Decimal128(the result array)
1361    perfectly_shredded_variant_array_fn!(
1362        perfectly_shredded_decimal16_within_decimal4_variant_array,
1363        || {
1364            Decimal128Array::from(vec![
1365                Some(i128::from(1234589)),
1366                Some(i128::from(2344444)),
1367                Some(i128::from(-1234789)),
1368            ])
1369            .with_precision_and_scale(7, 3)
1370            .unwrap()
1371        }
1372    );
1373
1374    // This tests will covert the logic DataType::Decimal128(the original array)
1375    // -> Variant::Decimal4(VariantArray) -> DataType::Decimal128(the result array)
1376    perfectly_shredded_to_arrow_primitive_test!(
1377        get_variant_perfectly_shredded_decimal16_within_decimal4_as_decimal16,
1378        DataType::Decimal128(7, 3),
1379        perfectly_shredded_decimal16_within_decimal4_variant_array,
1380        Decimal128Array::from(vec![
1381            Some(i128::from(1234589)),
1382            Some(i128::from(2344444)),
1383            Some(i128::from(-1234789)),
1384        ])
1385        .with_precision_and_scale(7, 3)
1386        .unwrap()
1387    );
1388
1389    perfectly_shredded_variant_array_fn!(
1390        perfectly_shredded_decimal16_within_decimal8_variant_array,
1391        || {
1392            Decimal128Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1393                .with_precision_and_scale(10, 1)
1394                .unwrap()
1395        }
1396    );
1397
1398    // This tests will covert the logic DataType::Decimal128(the original array)
1399    // -> Variant::Decimal8(VariantArray) -> DataType::Decimal128(the result array)
1400    perfectly_shredded_to_arrow_primitive_test!(
1401        get_variant_perfectly_shredded_decimal16_within8_as_decimal16,
1402        DataType::Decimal128(10, 1),
1403        perfectly_shredded_decimal16_within_decimal8_variant_array,
1404        Decimal128Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1405            .with_precision_and_scale(10, 1)
1406            .unwrap()
1407    );
1408
1409    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal16_variant_array, || {
1410        Decimal128Array::from(vec![
1411            Some(i128::from_str("12345678901234567899").unwrap()),
1412            Some(i128::from_str("23445677483748324300").unwrap()),
1413            Some(i128::from_str("-12345678901234567899").unwrap()),
1414        ])
1415        .with_precision_and_scale(20, 3)
1416        .unwrap()
1417    });
1418
1419    // This tests will covert the logic DataType::Decimal128(the original array)
1420    // -> Variant::Decimal16(VariantArray) -> DataType::Decimal128(the result array)
1421    perfectly_shredded_to_arrow_primitive_test!(
1422        get_variant_perfectly_shredded_decimal16_as_decimal16,
1423        DataType::Decimal128(20, 3),
1424        perfectly_shredded_decimal16_variant_array,
1425        Decimal128Array::from(vec![
1426            Some(i128::from_str("12345678901234567899").unwrap()),
1427            Some(i128::from_str("23445677483748324300").unwrap()),
1428            Some(i128::from_str("-12345678901234567899").unwrap())
1429        ])
1430        .with_precision_and_scale(20, 3)
1431        .unwrap()
1432    );
1433
1434    macro_rules! assert_variant_get_as_variant_array_with_default_option {
1435        ($variant_array: expr, $array_expected: expr) => {{
1436            let options = GetOptions::new();
1437            let array = $variant_array;
1438            let result = variant_get(&array, options).unwrap();
1439
1440            // expect the result is a VariantArray
1441            let result = VariantArray::try_new(&result).unwrap();
1442
1443            assert_eq!(result.len(), $array_expected.len());
1444
1445            for (idx, item) in $array_expected.into_iter().enumerate() {
1446                match item {
1447                    Some(item) => assert_eq!(result.value(idx), item),
1448                    None => assert!(result.is_null(idx)),
1449                }
1450            }
1451        }};
1452    }
1453
1454    partially_shredded_variant_array_gen!(
1455        partially_shredded_timestamp_micro_ntz_variant_array,
1456        || {
1457            arrow::array::TimestampMicrosecondArray::from(vec![
1458                Some(-456000),
1459                None,
1460                None,
1461                Some(1758602096000000),
1462            ])
1463        }
1464    );
1465
1466    #[test]
1467    fn get_variant_partial_shredded_timestamp_micro_ntz_as_variant() {
1468        let array = partially_shredded_timestamp_micro_ntz_variant_array();
1469        assert_variant_get_as_variant_array_with_default_option!(
1470            array,
1471            vec![
1472                Some(Variant::from(
1473                    DateTime::from_timestamp_micros(-456000i64)
1474                        .unwrap()
1475                        .naive_utc(),
1476                )),
1477                None,
1478                Some(Variant::from("n/a")),
1479                Some(Variant::from(
1480                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1481                        .unwrap()
1482                        .naive_utc(),
1483                )),
1484            ]
1485        )
1486    }
1487
1488    partially_shredded_variant_array_gen!(partially_shredded_timestamp_micro_variant_array, || {
1489        arrow::array::TimestampMicrosecondArray::from(vec![
1490            Some(-456000),
1491            None,
1492            None,
1493            Some(1758602096000000),
1494        ])
1495        .with_timezone("+00:00")
1496    });
1497
1498    #[test]
1499    fn get_variant_partial_shredded_timestamp_micro_as_variant() {
1500        let array = partially_shredded_timestamp_micro_variant_array();
1501        assert_variant_get_as_variant_array_with_default_option!(
1502            array,
1503            vec![
1504                Some(Variant::from(
1505                    DateTime::from_timestamp_micros(-456000i64)
1506                        .unwrap()
1507                        .to_utc(),
1508                )),
1509                None,
1510                Some(Variant::from("n/a")),
1511                Some(Variant::from(
1512                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1513                        .unwrap()
1514                        .to_utc(),
1515                )),
1516            ]
1517        )
1518    }
1519
1520    partially_shredded_variant_array_gen!(
1521        partially_shredded_timestamp_nano_ntz_variant_array,
1522        || {
1523            arrow::array::TimestampNanosecondArray::from(vec![
1524                Some(-4999999561),
1525                None,
1526                None,
1527                Some(1758602096000000000),
1528            ])
1529        }
1530    );
1531
1532    #[test]
1533    fn get_variant_partial_shredded_timestamp_nano_ntz_as_variant() {
1534        let array = partially_shredded_timestamp_nano_ntz_variant_array();
1535
1536        assert_variant_get_as_variant_array_with_default_option!(
1537            array,
1538            vec![
1539                Some(Variant::from(
1540                    DateTime::from_timestamp(-5, 439).unwrap().naive_utc()
1541                )),
1542                None,
1543                Some(Variant::from("n/a")),
1544                Some(Variant::from(
1545                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1546                        .unwrap()
1547                        .naive_utc()
1548                )),
1549            ]
1550        )
1551    }
1552
1553    partially_shredded_variant_array_gen!(partially_shredded_timestamp_nano_variant_array, || {
1554        arrow::array::TimestampNanosecondArray::from(vec![
1555            Some(-4999999561),
1556            None,
1557            None,
1558            Some(1758602096000000000),
1559        ])
1560        .with_timezone("+00:00")
1561    });
1562
1563    #[test]
1564    fn get_variant_partial_shredded_timestamp_nano_as_variant() {
1565        let array = partially_shredded_timestamp_nano_variant_array();
1566
1567        assert_variant_get_as_variant_array_with_default_option!(
1568            array,
1569            vec![
1570                Some(Variant::from(
1571                    DateTime::from_timestamp(-5, 439).unwrap().to_utc()
1572                )),
1573                None,
1574                Some(Variant::from("n/a")),
1575                Some(Variant::from(
1576                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1577                        .unwrap()
1578                        .to_utc()
1579                )),
1580            ]
1581        )
1582    }
1583
1584    perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_variant_array, || {
1585        BinaryArray::from(vec![
1586            Some(b"Apache" as &[u8]),
1587            Some(b"Arrow-rs" as &[u8]),
1588            Some(b"Parquet-variant" as &[u8]),
1589        ])
1590    });
1591
1592    perfectly_shredded_to_arrow_primitive_test!(
1593        get_variant_perfectly_shredded_binary_as_binary,
1594        DataType::Binary,
1595        perfectly_shredded_binary_variant_array,
1596        BinaryArray::from(vec![
1597            Some(b"Apache" as &[u8]),
1598            Some(b"Arrow-rs" as &[u8]),
1599            Some(b"Parquet-variant" as &[u8]),
1600        ])
1601    );
1602
1603    perfectly_shredded_variant_array_fn!(perfectly_shredded_large_binary_variant_array, || {
1604        LargeBinaryArray::from(vec![
1605            Some(b"Apache" as &[u8]),
1606            Some(b"Arrow-rs" as &[u8]),
1607            Some(b"Parquet-variant" as &[u8]),
1608        ])
1609    });
1610
1611    perfectly_shredded_to_arrow_primitive_test!(
1612        get_variant_perfectly_shredded_large_binary_as_large_binary,
1613        DataType::LargeBinary,
1614        perfectly_shredded_large_binary_variant_array,
1615        LargeBinaryArray::from(vec![
1616            Some(b"Apache" as &[u8]),
1617            Some(b"Arrow-rs" as &[u8]),
1618            Some(b"Parquet-variant" as &[u8]),
1619        ])
1620    );
1621
1622    perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_view_variant_array, || {
1623        BinaryViewArray::from(vec![
1624            Some(b"Apache" as &[u8]),
1625            Some(b"Arrow-rs" as &[u8]),
1626            Some(b"Parquet-variant" as &[u8]),
1627        ])
1628    });
1629
1630    perfectly_shredded_to_arrow_primitive_test!(
1631        get_variant_perfectly_shredded_binary_view_as_binary_view,
1632        DataType::BinaryView,
1633        perfectly_shredded_binary_view_variant_array,
1634        BinaryViewArray::from(vec![
1635            Some(b"Apache" as &[u8]),
1636            Some(b"Arrow-rs" as &[u8]),
1637            Some(b"Parquet-variant" as &[u8]),
1638        ])
1639    );
1640
1641    /// Return a VariantArray that represents a normal "shredded" variant
1642    /// for the following example
1643    ///
1644    /// Based on the example from [the doc]
1645    ///
1646    /// [the doc]: https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?tab=t.0
1647    ///
1648    /// ```text
1649    /// 34
1650    /// null (an Arrow NULL, not a Variant::Null)
1651    /// "n/a" (a string)
1652    /// 100
1653    /// ```
1654    ///
1655    /// The schema of the corresponding `StructArray` would look like this:
1656    ///
1657    /// ```text
1658    /// StructArray {
1659    ///   metadata: BinaryViewArray,
1660    ///   value: BinaryViewArray,
1661    ///   typed_value: Int32Array,
1662    /// }
1663    /// ```
1664    macro_rules! numeric_partially_shredded_variant_array_fn {
1665        ($func:ident, $array_type:ident, $primitive_type:ty) => {
1666            partially_shredded_variant_array_gen!($func, || $array_type::from(vec![
1667                Some(<$primitive_type>::try_from(34u8).unwrap()), // row 0 is shredded, so it has a value
1668                None,                                             // row 1 is null, so no value
1669                None, // row 2 is a string, so no typed value
1670                Some(<$primitive_type>::try_from(100u8).unwrap()), // row 3 is shredded, so it has a value
1671            ]));
1672        };
1673    }
1674
1675    macro_rules! partially_shredded_variant_array_gen {
1676        ($func:ident, $typed_array_gen: expr) => {
1677            fn $func() -> ArrayRef {
1678                // At the time of writing, the `VariantArrayBuilder` does not support shredding.
1679                // so we must construct the array manually.  see https://github.com/apache/arrow-rs/issues/7895
1680                let (metadata, string_value) = {
1681                    let mut builder = parquet_variant::VariantBuilder::new();
1682                    builder.append_value("n/a");
1683                    builder.finish()
1684                };
1685
1686                let nulls = NullBuffer::from(vec![
1687                    true,  // row 0 non null
1688                    false, // row 1 is null
1689                    true,  // row 2 non null
1690                    true,  // row 3 non null
1691                ]);
1692
1693                // metadata is the same for all rows
1694                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
1695
1696                // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
1697                // about why row1 is an empty but non null, value.
1698                let values = BinaryViewArray::from(vec![
1699                    None,                // row 0 is shredded, so no value
1700                    Some(b"" as &[u8]),  // row 1 is null, so empty value (why?)
1701                    Some(&string_value), // copy the string value "N/A"
1702                    None,                // row 3 is shredded, so no value
1703                ]);
1704
1705                let typed_value = $typed_array_gen();
1706
1707                let struct_array = StructArrayBuilder::new()
1708                    .with_field("metadata", Arc::new(metadata), false)
1709                    .with_field("typed_value", Arc::new(typed_value), true)
1710                    .with_field("value", Arc::new(values), true)
1711                    .with_nulls(nulls)
1712                    .build();
1713
1714                ArrayRef::from(
1715                    VariantArray::try_new(&struct_array).expect("should create variant array"),
1716                )
1717            }
1718        };
1719    }
1720
1721    numeric_partially_shredded_variant_array_fn!(
1722        partially_shredded_int8_variant_array,
1723        Int8Array,
1724        i8
1725    );
1726    numeric_partially_shredded_variant_array_fn!(
1727        partially_shredded_int16_variant_array,
1728        Int16Array,
1729        i16
1730    );
1731    numeric_partially_shredded_variant_array_fn!(
1732        partially_shredded_int32_variant_array,
1733        Int32Array,
1734        i32
1735    );
1736    numeric_partially_shredded_variant_array_fn!(
1737        partially_shredded_int64_variant_array,
1738        Int64Array,
1739        i64
1740    );
1741    numeric_partially_shredded_variant_array_fn!(
1742        partially_shredded_float32_variant_array,
1743        Float32Array,
1744        f32
1745    );
1746    numeric_partially_shredded_variant_array_fn!(
1747        partially_shredded_float64_variant_array,
1748        Float64Array,
1749        f64
1750    );
1751
1752    partially_shredded_variant_array_gen!(partially_shredded_bool_variant_array, || {
1753        arrow::array::BooleanArray::from(vec![
1754            Some(true),  // row 0 is shredded, so it has a value
1755            None,        // row 1 is null, so no value
1756            None,        // row 2 is a string, so no typed value
1757            Some(false), // row 3 is shredded, so it has a value
1758        ])
1759    });
1760
1761    partially_shredded_variant_array_gen!(partially_shredded_utf8_variant_array, || {
1762        StringArray::from(vec![
1763            Some("hello"), // row 0 is shredded
1764            None,          // row 1 is null
1765            None,          // row 2 is a string
1766            Some("world"), // row 3 is shredded
1767        ])
1768    });
1769
1770    partially_shredded_variant_array_gen!(partially_shredded_date32_variant_array, || {
1771        Date32Array::from(vec![
1772            Some(20348), // row 0 is shredded, 2025-09-17
1773            None,        // row 1 is null
1774            None,        // row 2 is a string, not a date
1775            Some(20340), // row 3 is shredded, 2025-09-09
1776        ])
1777    });
1778
1779    /// Return a VariantArray that represents an "all null" variant
1780    /// for the following example (3 null values):
1781    ///
1782    /// ```text
1783    /// null
1784    /// null
1785    /// null
1786    /// ```
1787    ///
1788    /// The schema of the corresponding `StructArray` would look like this:
1789    ///
1790    /// ```text
1791    /// StructArray {
1792    ///   metadata: BinaryViewArray,
1793    /// }
1794    /// ```
1795    fn all_null_variant_array() -> ArrayRef {
1796        let nulls = NullBuffer::from(vec![
1797            false, // row 0 is null
1798            false, // row 1 is null
1799            false, // row 2 is null
1800        ]);
1801
1802        // metadata is the same for all rows (though they're all null)
1803        let metadata =
1804            BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3));
1805
1806        let struct_array = StructArrayBuilder::new()
1807            .with_field("metadata", Arc::new(metadata), false)
1808            .with_nulls(nulls)
1809            .build();
1810
1811        Arc::new(struct_array)
1812    }
1813    /// This test manually constructs a shredded variant array representing objects
1814    /// like {"x": 1, "y": "foo"} and {"x": 42} and tests extracting the "x" field
1815    /// as VariantArray using variant_get.
1816    #[test]
1817    fn test_shredded_object_field_access() {
1818        let array = shredded_object_with_x_field_variant_array();
1819
1820        // Test: Extract the "x" field as VariantArray first
1821        let options = GetOptions::new_with_path(VariantPath::from("x"));
1822        let result = variant_get(&array, options).unwrap();
1823
1824        let result_variant = VariantArray::try_new(&result).unwrap();
1825        assert_eq!(result_variant.len(), 2);
1826
1827        // Row 0: expect x=1
1828        assert_eq!(result_variant.value(0), Variant::Int32(1));
1829        // Row 1: expect x=42
1830        assert_eq!(result_variant.value(1), Variant::Int32(42));
1831    }
1832
1833    /// Test extracting shredded object field with type conversion
1834    #[test]
1835    fn test_shredded_object_field_as_int32() {
1836        let array = shredded_object_with_x_field_variant_array();
1837
1838        // Test: Extract the "x" field as Int32Array (type conversion)
1839        let field = Field::new("x", DataType::Int32, false);
1840        let options = GetOptions::new_with_path(VariantPath::from("x"))
1841            .with_as_type(Some(FieldRef::from(field)));
1842        let result = variant_get(&array, options).unwrap();
1843
1844        // Should get Int32Array
1845        let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(42)]));
1846        assert_eq!(&result, &expected);
1847    }
1848
1849    /// Helper function to create a shredded variant array representing objects
1850    ///
1851    /// This creates an array that represents:
1852    /// Row 0: {"x": 1, "y": "foo"}  (x is shredded, y is in value field)
1853    /// Row 1: {"x": 42}             (x is shredded, perfect shredding)
1854    ///
1855    /// The physical layout follows the shredding spec where:
1856    /// - metadata: contains object metadata
1857    /// - typed_value: StructArray with field "x" (ShreddedVariantFieldArray)
1858    /// - value: contains fallback for unshredded fields like {"y": "foo"}
1859    /// - The "x" field has typed_value=Int32Array and value=NULL (perfect shredding)
1860    fn shredded_object_with_x_field_variant_array() -> ArrayRef {
1861        // Create the base metadata for objects
1862        let (metadata, y_field_value) = {
1863            let mut builder = parquet_variant::VariantBuilder::new();
1864            let mut obj = builder.new_object();
1865            obj.insert("x", Variant::Int32(42));
1866            obj.insert("y", Variant::from("foo"));
1867            obj.finish();
1868            builder.finish()
1869        };
1870
1871        // Create metadata array (same for both rows)
1872        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
1873
1874        // Create the main value field per the 3-step shredding spec:
1875        // Step 2: If field not in shredding schema, check value field
1876        // Row 0: {"y": "foo"} (y is not shredded, stays in value for step 2)
1877        // Row 1: {} (empty object - no unshredded fields)
1878        let empty_object_value = {
1879            let mut builder = parquet_variant::VariantBuilder::new();
1880            let obj = builder.new_object();
1881            obj.finish();
1882            let (_, value) = builder.finish();
1883            value
1884        };
1885
1886        let value_array = BinaryViewArray::from(vec![
1887            Some(y_field_value.as_slice()),      // Row 0 has {"y": "foo"}
1888            Some(empty_object_value.as_slice()), // Row 1 has {}
1889        ]);
1890
1891        // Create the "x" field as a ShreddedVariantFieldArray
1892        // This represents the shredded Int32 values for the "x" field
1893        let x_field_typed_value = Int32Array::from(vec![Some(1), Some(42)]);
1894
1895        // For perfect shredding of the x field, no "value" column, only typed_value
1896        let x_field_struct = StructArrayBuilder::new()
1897            .with_field("typed_value", Arc::new(x_field_typed_value), true)
1898            .build();
1899
1900        // Wrap the x field struct in a ShreddedVariantFieldArray
1901        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
1902            .expect("should create ShreddedVariantFieldArray");
1903
1904        // Create the main typed_value as a struct containing the "x" field
1905        let typed_value_fields = Fields::from(vec![Field::new(
1906            "x",
1907            x_field_shredded.data_type().clone(),
1908            true,
1909        )]);
1910        let typed_value_struct = StructArray::try_new(
1911            typed_value_fields,
1912            vec![ArrayRef::from(x_field_shredded)],
1913            None, // No nulls - both rows have the object structure
1914        )
1915        .unwrap();
1916
1917        // Create the main VariantArray
1918        let main_struct = StructArrayBuilder::new()
1919            .with_field("metadata", Arc::new(metadata_array), false)
1920            .with_field("value", Arc::new(value_array), true)
1921            .with_field("typed_value", Arc::new(typed_value_struct), true)
1922            .build();
1923
1924        Arc::new(main_struct)
1925    }
1926
1927    /// Simple test to check if nested paths are supported by current implementation
1928    #[test]
1929    fn test_simple_nested_path_support() {
1930        // Check: How does VariantPath parse different strings?
1931        println!("Testing path parsing:");
1932
1933        let path_x = VariantPath::from("x");
1934        let elements_x: Vec<_> = path_x.iter().collect();
1935        println!("  'x' -> {} elements: {:?}", elements_x.len(), elements_x);
1936
1937        let path_ax = VariantPath::from("a.x");
1938        let elements_ax: Vec<_> = path_ax.iter().collect();
1939        println!(
1940            "  'a.x' -> {} elements: {:?}",
1941            elements_ax.len(),
1942            elements_ax
1943        );
1944
1945        let path_ax_alt = VariantPath::from("$.a.x");
1946        let elements_ax_alt: Vec<_> = path_ax_alt.iter().collect();
1947        println!(
1948            "  '$.a.x' -> {} elements: {:?}",
1949            elements_ax_alt.len(),
1950            elements_ax_alt
1951        );
1952
1953        let path_nested = VariantPath::from("a").join("x");
1954        let elements_nested: Vec<_> = path_nested.iter().collect();
1955        println!(
1956            "  VariantPath::from('a').join('x') -> {} elements: {:?}",
1957            elements_nested.len(),
1958            elements_nested
1959        );
1960
1961        // Use your existing simple test data but try "a.x" instead of "x"
1962        let array = shredded_object_with_x_field_variant_array();
1963
1964        // Test if variant_get with REAL nested path throws not implemented error
1965        let real_nested_path = VariantPath::from("a").join("x");
1966        let options = GetOptions::new_with_path(real_nested_path);
1967        let result = variant_get(&array, options);
1968
1969        match result {
1970            Ok(_) => {
1971                println!("Nested path 'a.x' works unexpectedly!");
1972            }
1973            Err(e) => {
1974                println!("Nested path 'a.x' error: {}", e);
1975                if e.to_string().contains("Not yet implemented")
1976                    || e.to_string().contains("NotYetImplemented")
1977                {
1978                    println!("This is expected - nested paths are not implemented");
1979                    return;
1980                }
1981                // Any other error is also expected for now
1982                println!("This shows nested paths need implementation");
1983            }
1984        }
1985    }
1986
1987    /// Test comprehensive variant_get scenarios with Int32 conversion
1988    /// Test depth 0: Direct field access "x" with Int32 conversion
1989    /// Covers shredded vs non-shredded VariantArrays for simple field access
1990    #[test]
1991    fn test_depth_0_int32_conversion() {
1992        println!("=== Testing Depth 0: Direct field access ===");
1993
1994        // Non-shredded test data: [{"x": 42}, {"x": "foo"}, {"y": 10}]
1995        let unshredded_array = create_depth_0_test_data();
1996
1997        let field = Field::new("result", DataType::Int32, true);
1998        let path = VariantPath::from("x");
1999        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2000        let result = variant_get(&unshredded_array, options).unwrap();
2001
2002        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2003            Some(42), // {"x": 42} -> 42
2004            None,     // {"x": "foo"} -> NULL (type mismatch)
2005            None,     // {"y": 10} -> NULL (field missing)
2006        ]));
2007        assert_eq!(&result, &expected);
2008        println!("Depth 0 (unshredded) passed");
2009
2010        // Shredded test data: using simplified approach based on working pattern
2011        let shredded_array = create_depth_0_shredded_test_data_simple();
2012
2013        let field = Field::new("result", DataType::Int32, true);
2014        let path = VariantPath::from("x");
2015        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2016        let result = variant_get(&shredded_array, options).unwrap();
2017
2018        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2019            Some(42), // {"x": 42} -> 42 (from typed_value)
2020            None,     // {"x": "foo"} -> NULL (type mismatch, from value field)
2021        ]));
2022        assert_eq!(&result, &expected);
2023        println!("Depth 0 (shredded) passed");
2024    }
2025
2026    /// Test depth 1: Single nested field access "a.x" with Int32 conversion
2027    /// Covers shredded vs non-shredded VariantArrays for nested field access
2028    #[test]
2029    fn test_depth_1_int32_conversion() {
2030        println!("=== Testing Depth 1: Single nested field access ===");
2031
2032        // Non-shredded test data from the GitHub issue
2033        let unshredded_array = create_nested_path_test_data();
2034
2035        let field = Field::new("result", DataType::Int32, true);
2036        let path = VariantPath::from("a.x"); // Dot notation!
2037        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2038        let result = variant_get(&unshredded_array, options).unwrap();
2039
2040        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2041            Some(55), // {"a": {"x": 55}} -> 55
2042            None,     // {"a": {"x": "foo"}} -> NULL (type mismatch)
2043        ]));
2044        assert_eq!(&result, &expected);
2045        println!("Depth 1 (unshredded) passed");
2046
2047        // Shredded test data: depth 1 nested shredding
2048        let shredded_array = create_depth_1_shredded_test_data_working();
2049
2050        let field = Field::new("result", DataType::Int32, true);
2051        let path = VariantPath::from("a.x"); // Dot notation!
2052        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2053        let result = variant_get(&shredded_array, options).unwrap();
2054
2055        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2056            Some(55), // {"a": {"x": 55}} -> 55 (from nested shredded x)
2057            None,     // {"a": {"x": "foo"}} -> NULL (type mismatch in nested value)
2058        ]));
2059        assert_eq!(&result, &expected);
2060        println!("Depth 1 (shredded) passed");
2061    }
2062
2063    /// Test depth 2: Double nested field access "a.b.x" with Int32 conversion  
2064    /// Covers shredded vs non-shredded VariantArrays for deeply nested field access
2065    #[test]
2066    fn test_depth_2_int32_conversion() {
2067        println!("=== Testing Depth 2: Double nested field access ===");
2068
2069        // Non-shredded test data: [{"a": {"b": {"x": 100}}}, {"a": {"b": {"x": "bar"}}}, {"a": {"b": {"y": 200}}}]
2070        let unshredded_array = create_depth_2_test_data();
2071
2072        let field = Field::new("result", DataType::Int32, true);
2073        let path = VariantPath::from("a.b.x"); // Double nested dot notation!
2074        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2075        let result = variant_get(&unshredded_array, options).unwrap();
2076
2077        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2078            Some(100), // {"a": {"b": {"x": 100}}} -> 100
2079            None,      // {"a": {"b": {"x": "bar"}}} -> NULL (type mismatch)
2080            None,      // {"a": {"b": {"y": 200}}} -> NULL (field missing)
2081        ]));
2082        assert_eq!(&result, &expected);
2083        println!("Depth 2 (unshredded) passed");
2084
2085        // Shredded test data: depth 2 nested shredding
2086        let shredded_array = create_depth_2_shredded_test_data_working();
2087
2088        let field = Field::new("result", DataType::Int32, true);
2089        let path = VariantPath::from("a.b.x"); // Double nested dot notation!
2090        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2091        let result = variant_get(&shredded_array, options).unwrap();
2092
2093        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2094            Some(100), // {"a": {"b": {"x": 100}}} -> 100 (from deeply nested shredded x)
2095            None,      // {"a": {"b": {"x": "bar"}}} -> NULL (type mismatch in deep value)
2096            None,      // {"a": {"b": {"y": 200}}} -> NULL (field missing in deep structure)
2097        ]));
2098        assert_eq!(&result, &expected);
2099        println!("Depth 2 (shredded) passed");
2100    }
2101
2102    /// Test that demonstrates what CURRENTLY WORKS
2103    ///
2104    /// This shows that nested path functionality does work, but only when the
2105    /// test data matches what the current implementation expects
2106    #[test]
2107    fn test_current_nested_path_functionality() {
2108        let array = shredded_object_with_x_field_variant_array();
2109
2110        // Test: Extract the "x" field (single level) - this works
2111        let single_path = VariantPath::from("x");
2112        let field = Field::new("result", DataType::Int32, true);
2113        let options =
2114            GetOptions::new_with_path(single_path).with_as_type(Some(FieldRef::from(field)));
2115        let result = variant_get(&array, options).unwrap();
2116
2117        println!("Single path 'x' works - result: {:?}", result);
2118
2119        // Test: Try nested path "a.x" - this is what we need to implement
2120        let nested_path = VariantPath::from("a").join("x");
2121        let field = Field::new("result", DataType::Int32, true);
2122        let options =
2123            GetOptions::new_with_path(nested_path).with_as_type(Some(FieldRef::from(field)));
2124        let result = variant_get(&array, options).unwrap();
2125
2126        println!("Nested path 'a.x' result: {:?}", result);
2127    }
2128
2129    /// Create test data for depth 0 (direct field access)
2130    /// [{"x": 42}, {"x": "foo"}, {"y": 10}]
2131    fn create_depth_0_test_data() -> ArrayRef {
2132        let mut builder = crate::VariantArrayBuilder::new(3);
2133
2134        // Row 1: {"x": 42}
2135        {
2136            let json_str = r#"{"x": 42}"#;
2137            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2138            if let Ok(variant_array) = json_to_variant(&string_array) {
2139                builder.append_variant(variant_array.value(0));
2140            } else {
2141                builder.append_null();
2142            }
2143        }
2144
2145        // Row 2: {"x": "foo"}
2146        {
2147            let json_str = r#"{"x": "foo"}"#;
2148            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2149            if let Ok(variant_array) = json_to_variant(&string_array) {
2150                builder.append_variant(variant_array.value(0));
2151            } else {
2152                builder.append_null();
2153            }
2154        }
2155
2156        // Row 3: {"y": 10} (missing "x" field)
2157        {
2158            let json_str = r#"{"y": 10}"#;
2159            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2160            if let Ok(variant_array) = json_to_variant(&string_array) {
2161                builder.append_variant(variant_array.value(0));
2162            } else {
2163                builder.append_null();
2164            }
2165        }
2166
2167        ArrayRef::from(builder.build())
2168    }
2169
2170    /// Create test data for depth 1 (single nested field)
2171    /// This represents the exact scenarios from the GitHub issue: "a.x"
2172    fn create_nested_path_test_data() -> ArrayRef {
2173        let mut builder = crate::VariantArrayBuilder::new(2);
2174
2175        // Row 1: {"a": {"x": 55}, "b": 42}
2176        {
2177            let json_str = r#"{"a": {"x": 55}, "b": 42}"#;
2178            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2179            if let Ok(variant_array) = json_to_variant(&string_array) {
2180                builder.append_variant(variant_array.value(0));
2181            } else {
2182                builder.append_null();
2183            }
2184        }
2185
2186        // Row 2: {"a": {"x": "foo"}, "b": 42}
2187        {
2188            let json_str = r#"{"a": {"x": "foo"}, "b": 42}"#;
2189            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2190            if let Ok(variant_array) = json_to_variant(&string_array) {
2191                builder.append_variant(variant_array.value(0));
2192            } else {
2193                builder.append_null();
2194            }
2195        }
2196
2197        ArrayRef::from(builder.build())
2198    }
2199
2200    /// Create test data for depth 2 (double nested field)
2201    /// [{"a": {"b": {"x": 100}}}, {"a": {"b": {"x": "bar"}}}, {"a": {"b": {"y": 200}}}]
2202    fn create_depth_2_test_data() -> ArrayRef {
2203        let mut builder = crate::VariantArrayBuilder::new(3);
2204
2205        // Row 1: {"a": {"b": {"x": 100}}}
2206        {
2207            let json_str = r#"{"a": {"b": {"x": 100}}}"#;
2208            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2209            if let Ok(variant_array) = json_to_variant(&string_array) {
2210                builder.append_variant(variant_array.value(0));
2211            } else {
2212                builder.append_null();
2213            }
2214        }
2215
2216        // Row 2: {"a": {"b": {"x": "bar"}}}
2217        {
2218            let json_str = r#"{"a": {"b": {"x": "bar"}}}"#;
2219            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2220            if let Ok(variant_array) = json_to_variant(&string_array) {
2221                builder.append_variant(variant_array.value(0));
2222            } else {
2223                builder.append_null();
2224            }
2225        }
2226
2227        // Row 3: {"a": {"b": {"y": 200}}} (missing "x" field)
2228        {
2229            let json_str = r#"{"a": {"b": {"y": 200}}}"#;
2230            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2231            if let Ok(variant_array) = json_to_variant(&string_array) {
2232                builder.append_variant(variant_array.value(0));
2233            } else {
2234                builder.append_null();
2235            }
2236        }
2237
2238        ArrayRef::from(builder.build())
2239    }
2240
2241    /// Create simple shredded test data for depth 0 using a simplified working pattern
2242    /// Creates 2 rows: [{"x": 42}, {"x": "foo"}] with "x" shredded where possible
2243    fn create_depth_0_shredded_test_data_simple() -> ArrayRef {
2244        // Create base metadata using the working pattern
2245        let (metadata, string_x_value) = {
2246            let mut builder = parquet_variant::VariantBuilder::new();
2247            let mut obj = builder.new_object();
2248            obj.insert("x", Variant::from("foo"));
2249            obj.finish();
2250            builder.finish()
2251        };
2252
2253        // Metadata array (same for both rows)
2254        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
2255
2256        // Value array following the 3-step shredding spec:
2257        // Row 0: {} (x is shredded, no unshredded fields)
2258        // Row 1: {"x": "foo"} (x is a string, can't be shredded to Int32)
2259        let empty_object_value = {
2260            let mut builder = parquet_variant::VariantBuilder::new();
2261            let obj = builder.new_object();
2262            obj.finish();
2263            let (_, value) = builder.finish();
2264            value
2265        };
2266
2267        let value_array = BinaryViewArray::from(vec![
2268            Some(empty_object_value.as_slice()), // Row 0: {} (x shredded out)
2269            Some(string_x_value.as_slice()),     // Row 1: {"x": "foo"} (fallback)
2270        ]);
2271
2272        // Create the "x" field as a ShreddedVariantFieldArray
2273        let x_field_typed_value = Int32Array::from(vec![Some(42), None]);
2274
2275        // For the x field, only typed_value (perfect shredding when possible)
2276        let x_field_struct = StructArrayBuilder::new()
2277            .with_field("typed_value", Arc::new(x_field_typed_value), true)
2278            .build();
2279
2280        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
2281            .expect("should create ShreddedVariantFieldArray");
2282
2283        // Create the main typed_value as a struct containing the "x" field
2284        let typed_value_fields = Fields::from(vec![Field::new(
2285            "x",
2286            x_field_shredded.data_type().clone(),
2287            true,
2288        )]);
2289        let typed_value_struct = StructArray::try_new(
2290            typed_value_fields,
2291            vec![ArrayRef::from(x_field_shredded)],
2292            None,
2293        )
2294        .unwrap();
2295
2296        // Build final VariantArray
2297        let struct_array = StructArrayBuilder::new()
2298            .with_field("metadata", Arc::new(metadata_array), false)
2299            .with_field("value", Arc::new(value_array), true)
2300            .with_field("typed_value", Arc::new(typed_value_struct), true)
2301            .build();
2302
2303        Arc::new(struct_array)
2304    }
2305
2306    /// Create working depth 1 shredded test data based on the existing working pattern
2307    /// This creates a properly structured shredded variant for "a.x" where:
2308    /// - Row 0: {"a": {"x": 55}, "b": 42} with a.x shredded into typed_value
2309    /// - Row 1: {"a": {"x": "foo"}, "b": 42} with a.x fallback to value field due to type mismatch
2310    fn create_depth_1_shredded_test_data_working() -> ArrayRef {
2311        // Create metadata following the working pattern from shredded_object_with_x_field_variant_array
2312        let (metadata, _) = {
2313            // Create nested structure: {"a": {"x": 55}, "b": 42}
2314            let mut builder = parquet_variant::VariantBuilder::new();
2315            let mut obj = builder.new_object();
2316
2317            // Create the nested "a" object
2318            let mut a_obj = obj.new_object("a");
2319            a_obj.insert("x", Variant::Int32(55));
2320            a_obj.finish();
2321
2322            obj.insert("b", Variant::Int32(42));
2323            obj.finish();
2324            builder.finish()
2325        };
2326
2327        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
2328
2329        // Create value arrays for the fallback case
2330        // Following the spec: if field cannot be shredded, it stays in value
2331        let empty_object_value = {
2332            let mut builder = parquet_variant::VariantBuilder::new();
2333            let obj = builder.new_object();
2334            obj.finish();
2335            let (_, value) = builder.finish();
2336            value
2337        };
2338
2339        // Row 1 fallback: use the working pattern from the existing shredded test
2340        // This avoids metadata issues by using the simple fallback approach
2341        let row1_fallback = {
2342            let mut builder = parquet_variant::VariantBuilder::new();
2343            let mut obj = builder.new_object();
2344            obj.insert("fallback", Variant::from("data"));
2345            obj.finish();
2346            let (_, value) = builder.finish();
2347            value
2348        };
2349
2350        let value_array = BinaryViewArray::from(vec![
2351            Some(empty_object_value.as_slice()), // Row 0: {} (everything shredded except b in unshredded fields)
2352            Some(row1_fallback.as_slice()), // Row 1: {"a": {"x": "foo"}, "b": 42} (a.x can't be shredded)
2353        ]);
2354
2355        // Create the nested shredded structure
2356        // Level 2: x field (the deepest level)
2357        let x_typed_value = Int32Array::from(vec![Some(55), None]);
2358        let x_field_struct = StructArrayBuilder::new()
2359            .with_field("typed_value", Arc::new(x_typed_value), true)
2360            .build();
2361        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
2362            .expect("should create ShreddedVariantFieldArray for x");
2363
2364        // Level 1: a field containing x field + value field for fallbacks
2365        // The "a" field needs both typed_value (for shredded x) and value (for fallback cases)
2366
2367        // Create the value field for "a" (for cases where a.x can't be shredded)
2368        let a_value_data = {
2369            let mut builder = parquet_variant::VariantBuilder::new();
2370            let obj = builder.new_object();
2371            obj.finish();
2372            let (_, value) = builder.finish();
2373            value
2374        };
2375        let a_value_array = BinaryViewArray::from(vec![
2376            None,                          // Row 0: x is shredded, so no value fallback needed
2377            Some(a_value_data.as_slice()), // Row 1: fallback for a.x="foo" (but logic will check typed_value first)
2378        ]);
2379
2380        let a_inner_fields = Fields::from(vec![Field::new(
2381            "x",
2382            x_field_shredded.data_type().clone(),
2383            true,
2384        )]);
2385        let a_inner_struct = StructArrayBuilder::new()
2386            .with_field(
2387                "typed_value",
2388                Arc::new(
2389                    StructArray::try_new(
2390                        a_inner_fields,
2391                        vec![ArrayRef::from(x_field_shredded)],
2392                        None,
2393                    )
2394                    .unwrap(),
2395                ),
2396                true,
2397            )
2398            .with_field("value", Arc::new(a_value_array), true)
2399            .build();
2400        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct)
2401            .expect("should create ShreddedVariantFieldArray for a");
2402
2403        // Level 0: main typed_value struct containing a field
2404        let typed_value_fields = Fields::from(vec![Field::new(
2405            "a",
2406            a_field_shredded.data_type().clone(),
2407            true,
2408        )]);
2409        let typed_value_struct = StructArray::try_new(
2410            typed_value_fields,
2411            vec![ArrayRef::from(a_field_shredded)],
2412            None,
2413        )
2414        .unwrap();
2415
2416        // Build final VariantArray
2417        let struct_array = StructArrayBuilder::new()
2418            .with_field("metadata", Arc::new(metadata_array), false)
2419            .with_field("value", Arc::new(value_array), true)
2420            .with_field("typed_value", Arc::new(typed_value_struct), true)
2421            .build();
2422
2423        Arc::new(struct_array)
2424    }
2425
2426    /// Create working depth 2 shredded test data for "a.b.x" paths
2427    /// This creates a 3-level nested shredded structure where:
2428    /// - Row 0: {"a": {"b": {"x": 100}}} with a.b.x shredded into typed_value
2429    /// - Row 1: {"a": {"b": {"x": "bar"}}} with type mismatch fallback
2430    /// - Row 2: {"a": {"b": {"y": 200}}} with missing field fallback
2431    fn create_depth_2_shredded_test_data_working() -> ArrayRef {
2432        // Create metadata following the working pattern
2433        let (metadata, _) = {
2434            // Create deeply nested structure: {"a": {"b": {"x": 100}}}
2435            let mut builder = parquet_variant::VariantBuilder::new();
2436            let mut obj = builder.new_object();
2437
2438            // Create the nested "a.b" structure
2439            let mut a_obj = obj.new_object("a");
2440            let mut b_obj = a_obj.new_object("b");
2441            b_obj.insert("x", Variant::Int32(100));
2442            b_obj.finish();
2443            a_obj.finish();
2444
2445            obj.finish();
2446            builder.finish()
2447        };
2448
2449        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
2450
2451        // Create value arrays for fallback cases
2452        let empty_object_value = {
2453            let mut builder = parquet_variant::VariantBuilder::new();
2454            let obj = builder.new_object();
2455            obj.finish();
2456            let (_, value) = builder.finish();
2457            value
2458        };
2459
2460        // Simple fallback values - avoiding complex nested metadata
2461        let value_array = BinaryViewArray::from(vec![
2462            Some(empty_object_value.as_slice()), // Row 0: fully shredded
2463            Some(empty_object_value.as_slice()), // Row 1: fallback (simplified)
2464            Some(empty_object_value.as_slice()), // Row 2: fallback (simplified)
2465        ]);
2466
2467        // Create the deeply nested shredded structure: a.b.x
2468
2469        // Level 3: x field (deepest level)
2470        let x_typed_value = Int32Array::from(vec![Some(100), None, None]);
2471        let x_field_struct = StructArrayBuilder::new()
2472            .with_field("typed_value", Arc::new(x_typed_value), true)
2473            .build();
2474        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
2475            .expect("should create ShreddedVariantFieldArray for x");
2476
2477        // Level 2: b field containing x field + value field
2478        let b_value_data = {
2479            let mut builder = parquet_variant::VariantBuilder::new();
2480            let obj = builder.new_object();
2481            obj.finish();
2482            let (_, value) = builder.finish();
2483            value
2484        };
2485        let b_value_array = BinaryViewArray::from(vec![
2486            None,                          // Row 0: x is shredded
2487            Some(b_value_data.as_slice()), // Row 1: fallback for b.x="bar"
2488            Some(b_value_data.as_slice()), // Row 2: fallback for b.y=200
2489        ]);
2490
2491        let b_inner_fields = Fields::from(vec![Field::new(
2492            "x",
2493            x_field_shredded.data_type().clone(),
2494            true,
2495        )]);
2496        let b_inner_struct = StructArrayBuilder::new()
2497            .with_field(
2498                "typed_value",
2499                Arc::new(
2500                    StructArray::try_new(
2501                        b_inner_fields,
2502                        vec![ArrayRef::from(x_field_shredded)],
2503                        None,
2504                    )
2505                    .unwrap(),
2506                ),
2507                true,
2508            )
2509            .with_field("value", Arc::new(b_value_array), true)
2510            .build();
2511        let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_inner_struct)
2512            .expect("should create ShreddedVariantFieldArray for b");
2513
2514        // Level 1: a field containing b field + value field
2515        let a_value_data = {
2516            let mut builder = parquet_variant::VariantBuilder::new();
2517            let obj = builder.new_object();
2518            obj.finish();
2519            let (_, value) = builder.finish();
2520            value
2521        };
2522        let a_value_array = BinaryViewArray::from(vec![
2523            None,                          // Row 0: b is shredded
2524            Some(a_value_data.as_slice()), // Row 1: fallback for a.b.*
2525            Some(a_value_data.as_slice()), // Row 2: fallback for a.b.*
2526        ]);
2527
2528        let a_inner_fields = Fields::from(vec![Field::new(
2529            "b",
2530            b_field_shredded.data_type().clone(),
2531            true,
2532        )]);
2533        let a_inner_struct = StructArrayBuilder::new()
2534            .with_field(
2535                "typed_value",
2536                Arc::new(
2537                    StructArray::try_new(
2538                        a_inner_fields,
2539                        vec![ArrayRef::from(b_field_shredded)],
2540                        None,
2541                    )
2542                    .unwrap(),
2543                ),
2544                true,
2545            )
2546            .with_field("value", Arc::new(a_value_array), true)
2547            .build();
2548        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct)
2549            .expect("should create ShreddedVariantFieldArray for a");
2550
2551        // Level 0: main typed_value struct containing a field
2552        let typed_value_fields = Fields::from(vec![Field::new(
2553            "a",
2554            a_field_shredded.data_type().clone(),
2555            true,
2556        )]);
2557        let typed_value_struct = StructArray::try_new(
2558            typed_value_fields,
2559            vec![ArrayRef::from(a_field_shredded)],
2560            None,
2561        )
2562        .unwrap();
2563
2564        // Build final VariantArray
2565        let struct_array = StructArrayBuilder::new()
2566            .with_field("metadata", Arc::new(metadata_array), false)
2567            .with_field("value", Arc::new(value_array), true)
2568            .with_field("typed_value", Arc::new(typed_value_struct), true)
2569            .build();
2570
2571        Arc::new(struct_array)
2572    }
2573
2574    #[test]
2575    fn test_strict_cast_options_downcast_failure() {
2576        use arrow::compute::CastOptions;
2577        use arrow::datatypes::{DataType, Field};
2578        use arrow::error::ArrowError;
2579        use parquet_variant::VariantPath;
2580        use std::sync::Arc;
2581
2582        // Use the existing simple test data that has Int32 as typed_value
2583        let variant_array = perfectly_shredded_int32_variant_array();
2584
2585        // Try to access a field with safe cast options (should return NULLs)
2586        let safe_options = GetOptions {
2587            path: VariantPath::from("nonexistent_field"),
2588            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2589            cast_options: CastOptions::default(), // safe = true
2590        };
2591
2592        let variant_array_ref: Arc<dyn Array> = variant_array.clone();
2593        let result = variant_get(&variant_array_ref, safe_options);
2594        // Should succeed and return NULLs (safe behavior)
2595        assert!(result.is_ok());
2596        let result_array = result.unwrap();
2597        assert_eq!(result_array.len(), 3);
2598        assert!(result_array.is_null(0));
2599        assert!(result_array.is_null(1));
2600        assert!(result_array.is_null(2));
2601
2602        // Try to access a field with strict cast options (should error)
2603        let strict_options = GetOptions {
2604            path: VariantPath::from("nonexistent_field"),
2605            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2606            cast_options: CastOptions {
2607                safe: false,
2608                ..Default::default()
2609            },
2610        };
2611
2612        let result = variant_get(&variant_array_ref, strict_options);
2613        // Should fail with a cast error
2614        assert!(result.is_err());
2615        let error = result.unwrap_err();
2616        assert!(matches!(error, ArrowError::CastError(_)));
2617        assert!(
2618            error
2619                .to_string()
2620                .contains("Cannot access field 'nonexistent_field' on non-struct type")
2621        );
2622    }
2623
2624    #[test]
2625    fn test_error_message_boolean_type_display() {
2626        let mut builder = VariantArrayBuilder::new(1);
2627        builder.append_variant(Variant::Int32(123));
2628        let variant_array: ArrayRef = ArrayRef::from(builder.build());
2629
2630        // Request Boolean with strict casting to force an error
2631        let options = GetOptions {
2632            path: VariantPath::default(),
2633            as_type: Some(Arc::new(Field::new("result", DataType::Boolean, true))),
2634            cast_options: CastOptions {
2635                safe: false,
2636                ..Default::default()
2637            },
2638        };
2639
2640        let err = variant_get(&variant_array, options).unwrap_err();
2641        let msg = err.to_string();
2642        assert!(msg.contains("Failed to extract primitive of type Boolean"));
2643    }
2644
2645    #[test]
2646    fn test_error_message_numeric_type_display() {
2647        let mut builder = VariantArrayBuilder::new(1);
2648        builder.append_variant(Variant::BooleanTrue);
2649        let variant_array: ArrayRef = ArrayRef::from(builder.build());
2650
2651        // Request Boolean with strict casting to force an error
2652        let options = GetOptions {
2653            path: VariantPath::default(),
2654            as_type: Some(Arc::new(Field::new("result", DataType::Float32, true))),
2655            cast_options: CastOptions {
2656                safe: false,
2657                ..Default::default()
2658            },
2659        };
2660
2661        let err = variant_get(&variant_array, options).unwrap_err();
2662        let msg = err.to_string();
2663        assert!(msg.contains("Failed to extract primitive of type Float32"));
2664    }
2665
2666    #[test]
2667    fn test_error_message_temporal_type_display() {
2668        let mut builder = VariantArrayBuilder::new(1);
2669        builder.append_variant(Variant::BooleanFalse);
2670        let variant_array: ArrayRef = ArrayRef::from(builder.build());
2671
2672        // Request Boolean with strict casting to force an error
2673        let options = GetOptions {
2674            path: VariantPath::default(),
2675            as_type: Some(Arc::new(Field::new(
2676                "result",
2677                DataType::Timestamp(TimeUnit::Nanosecond, None),
2678                true,
2679            ))),
2680            cast_options: CastOptions {
2681                safe: false,
2682                ..Default::default()
2683            },
2684        };
2685
2686        let err = variant_get(&variant_array, options).unwrap_err();
2687        let msg = err.to_string();
2688        assert!(msg.contains("Failed to extract primitive of type Timestamp(ns)"));
2689    }
2690
2691    #[test]
2692    fn test_null_buffer_union_for_shredded_paths() {
2693        use arrow::compute::CastOptions;
2694        use arrow::datatypes::{DataType, Field};
2695        use parquet_variant::VariantPath;
2696        use std::sync::Arc;
2697
2698        // Test that null buffers are properly unioned when traversing shredded paths
2699        // This test verifies scovich's null buffer union requirement
2700
2701        // Create a depth-1 shredded variant array where:
2702        // - The top-level variant array has some nulls
2703        // - The nested typed_value also has some nulls
2704        // - The result should be the union of both null buffers
2705
2706        let variant_array = create_depth_1_shredded_test_data_working();
2707
2708        // Get the field "x" which should union nulls from:
2709        // 1. The top-level variant array nulls
2710        // 2. The "a" field's typed_value nulls
2711        // 3. The "x" field's typed_value nulls
2712        let options = GetOptions {
2713            path: VariantPath::from("a.x"),
2714            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2715            cast_options: CastOptions::default(),
2716        };
2717
2718        let variant_array_ref: Arc<dyn Array> = variant_array.clone();
2719        let result = variant_get(&variant_array_ref, options).unwrap();
2720
2721        // Verify the result length matches input
2722        assert_eq!(result.len(), variant_array.len());
2723
2724        // The null pattern should reflect the union of all ancestor nulls
2725        // Row 0: Should have valid data (path exists and is shredded as Int32)
2726        // Row 1: Should be null (due to type mismatch - "foo" can't cast to Int32)
2727        assert!(!result.is_null(0), "Row 0 should have valid Int32 data");
2728        assert!(
2729            result.is_null(1),
2730            "Row 1 should be null due to type casting failure"
2731        );
2732
2733        // Verify the actual values
2734        let int32_result = result.as_any().downcast_ref::<Int32Array>().unwrap();
2735        assert_eq!(int32_result.value(0), 55); // The valid Int32 value
2736    }
2737
2738    #[test]
2739    fn test_struct_null_mask_union_from_children() {
2740        use arrow::compute::CastOptions;
2741        use arrow::datatypes::{DataType, Field, Fields};
2742        use parquet_variant::VariantPath;
2743        use std::sync::Arc;
2744
2745        use arrow::array::StringArray;
2746
2747        // Test that struct null masks properly union nulls from children field extractions
2748        // This verifies scovich's concern about incomplete null masks in struct construction
2749
2750        // Create test data where some fields will fail type casting
2751        let json_strings = vec![
2752            r#"{"a": 42, "b": "hello"}"#, // Row 0: a=42 (castable to int), b="hello" (not castable to int)
2753            r#"{"a": "world", "b": 100}"#, // Row 1: a="world" (not castable to int), b=100 (castable to int)
2754            r#"{"a": 55, "b": 77}"#,       // Row 2: a=55 (castable to int), b=77 (castable to int)
2755        ];
2756
2757        let string_array: Arc<dyn arrow::array::Array> = Arc::new(StringArray::from(json_strings));
2758        let variant_array = json_to_variant(&string_array).unwrap();
2759
2760        // Request extraction as a struct with both fields as Int32
2761        // This should create child arrays where some fields are null due to casting failures
2762        let struct_fields = Fields::from(vec![
2763            Field::new("a", DataType::Int32, true),
2764            Field::new("b", DataType::Int32, true),
2765        ]);
2766        let struct_type = DataType::Struct(struct_fields);
2767
2768        let options = GetOptions {
2769            path: VariantPath::default(), // Extract the whole object as struct
2770            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
2771            cast_options: CastOptions::default(),
2772        };
2773
2774        let variant_array_ref = ArrayRef::from(variant_array);
2775        let result = variant_get(&variant_array_ref, options).unwrap();
2776
2777        // Verify the result is a StructArray
2778        let struct_result = result.as_struct();
2779        assert_eq!(struct_result.len(), 3);
2780
2781        // Get the individual field arrays
2782        let field_a = struct_result
2783            .column(0)
2784            .as_any()
2785            .downcast_ref::<Int32Array>()
2786            .unwrap();
2787        let field_b = struct_result
2788            .column(1)
2789            .as_any()
2790            .downcast_ref::<Int32Array>()
2791            .unwrap();
2792
2793        // Verify field values and nulls
2794        // Row 0: a=42 (valid), b=null (casting failure)
2795        assert!(!field_a.is_null(0));
2796        assert_eq!(field_a.value(0), 42);
2797        assert!(field_b.is_null(0)); // "hello" can't cast to int
2798
2799        // Row 1: a=null (casting failure), b=100 (valid)
2800        assert!(field_a.is_null(1)); // "world" can't cast to int
2801        assert!(!field_b.is_null(1));
2802        assert_eq!(field_b.value(1), 100);
2803
2804        // Row 2: a=55 (valid), b=77 (valid)
2805        assert!(!field_a.is_null(2));
2806        assert_eq!(field_a.value(2), 55);
2807        assert!(!field_b.is_null(2));
2808        assert_eq!(field_b.value(2), 77);
2809
2810        // Verify the struct-level null mask properly unions child nulls
2811        // The struct should NOT be null in any row because each row has at least one valid field
2812        // (This tests that we're not incorrectly making the entire struct null when children fail)
2813        assert!(!struct_result.is_null(0)); // Has valid field 'a'
2814        assert!(!struct_result.is_null(1)); // Has valid field 'b'
2815        assert!(!struct_result.is_null(2)); // Has both valid fields
2816    }
2817
2818    #[test]
2819    fn test_field_nullability_preservation() {
2820        use arrow::compute::CastOptions;
2821        use arrow::datatypes::{DataType, Field};
2822        use parquet_variant::VariantPath;
2823        use std::sync::Arc;
2824
2825        use arrow::array::StringArray;
2826
2827        // Test that field nullability from GetOptions.as_type is preserved in the result
2828
2829        let json_strings = vec![
2830            r#"{"x": 42}"#,                  // Row 0: Valid int that should convert to Int32
2831            r#"{"x": "not_a_number"}"#,      // Row 1: String that can't cast to Int32
2832            r#"{"x": null}"#,                // Row 2: Explicit null value
2833            r#"{"x": "hello"}"#,             // Row 3: Another string (wrong type)
2834            r#"{"y": 100}"#,                 // Row 4: Missing "x" field (SQL NULL case)
2835            r#"{"x": 127}"#, // Row 5: Small int (could be Int8, widening cast candidate)
2836            r#"{"x": 32767}"#, // Row 6: Medium int (could be Int16, widening cast candidate)
2837            r#"{"x": 2147483647}"#, // Row 7: Max Int32 value (fits in Int32)
2838            r#"{"x": 9223372036854775807}"#, // Row 8: Large Int64 value (cannot convert to Int32)
2839        ];
2840
2841        let string_array: Arc<dyn arrow::array::Array> = Arc::new(StringArray::from(json_strings));
2842        let variant_array = json_to_variant(&string_array).unwrap();
2843
2844        // Test 1: nullable field (should allow nulls from cast failures)
2845        let nullable_field = Arc::new(Field::new("result", DataType::Int32, true));
2846        let options_nullable = GetOptions {
2847            path: VariantPath::from("x"),
2848            as_type: Some(nullable_field.clone()),
2849            cast_options: CastOptions::default(),
2850        };
2851
2852        let variant_array_ref = ArrayRef::from(variant_array);
2853        let result_nullable = variant_get(&variant_array_ref, options_nullable).unwrap();
2854
2855        // Verify we get an Int32Array with nulls for cast failures
2856        let int32_result = result_nullable
2857            .as_any()
2858            .downcast_ref::<Int32Array>()
2859            .unwrap();
2860        assert_eq!(int32_result.len(), 9);
2861
2862        // Row 0: 42 converts successfully to Int32
2863        assert!(!int32_result.is_null(0));
2864        assert_eq!(int32_result.value(0), 42);
2865
2866        // Row 1: "not_a_number" fails to convert -> NULL
2867        assert!(int32_result.is_null(1));
2868
2869        // Row 2: explicit null value -> NULL
2870        assert!(int32_result.is_null(2));
2871
2872        // Row 3: "hello" (wrong type) fails to convert -> NULL
2873        assert!(int32_result.is_null(3));
2874
2875        // Row 4: missing "x" field (SQL NULL case) -> NULL
2876        assert!(int32_result.is_null(4));
2877
2878        // Row 5: 127 (small int, potential Int8 -> Int32 widening)
2879        // Current behavior: JSON parses to Int8, should convert to Int32
2880        assert!(!int32_result.is_null(5));
2881        assert_eq!(int32_result.value(5), 127);
2882
2883        // Row 6: 32767 (medium int, potential Int16 -> Int32 widening)
2884        // Current behavior: JSON parses to Int16, should convert to Int32
2885        assert!(!int32_result.is_null(6));
2886        assert_eq!(int32_result.value(6), 32767);
2887
2888        // Row 7: 2147483647 (max Int32, fits exactly)
2889        // Current behavior: Should convert successfully
2890        assert!(!int32_result.is_null(7));
2891        assert_eq!(int32_result.value(7), 2147483647);
2892
2893        // Row 8: 9223372036854775807 (large Int64, cannot fit in Int32)
2894        // Current behavior: Should fail conversion -> NULL
2895        assert!(int32_result.is_null(8));
2896
2897        // Test 2: non-nullable field (behavior should be the same with safe casting)
2898        let non_nullable_field = Arc::new(Field::new("result", DataType::Int32, false));
2899        let options_non_nullable = GetOptions {
2900            path: VariantPath::from("x"),
2901            as_type: Some(non_nullable_field.clone()),
2902            cast_options: CastOptions::default(), // safe=true by default
2903        };
2904
2905        // Create variant array again since we moved it
2906        let variant_array_2 = json_to_variant(&string_array).unwrap();
2907        let variant_array_ref_2 = ArrayRef::from(variant_array_2);
2908        let result_non_nullable = variant_get(&variant_array_ref_2, options_non_nullable).unwrap();
2909        let int32_result_2 = result_non_nullable
2910            .as_any()
2911            .downcast_ref::<Int32Array>()
2912            .unwrap();
2913
2914        // Even with a non-nullable field, safe casting should still produce nulls for failures
2915        assert_eq!(int32_result_2.len(), 9);
2916
2917        // Row 0: 42 converts successfully to Int32
2918        assert!(!int32_result_2.is_null(0));
2919        assert_eq!(int32_result_2.value(0), 42);
2920
2921        // Rows 1-4: All should be null due to safe casting behavior
2922        // (non-nullable field specification doesn't override safe casting behavior)
2923        assert!(int32_result_2.is_null(1)); // "not_a_number"
2924        assert!(int32_result_2.is_null(2)); // explicit null
2925        assert!(int32_result_2.is_null(3)); // "hello"
2926        assert!(int32_result_2.is_null(4)); // missing field
2927
2928        // Rows 5-7: These should also convert successfully (numeric widening/fitting)
2929        assert!(!int32_result_2.is_null(5)); // 127 (Int8 -> Int32)
2930        assert_eq!(int32_result_2.value(5), 127);
2931        assert!(!int32_result_2.is_null(6)); // 32767 (Int16 -> Int32)
2932        assert_eq!(int32_result_2.value(6), 32767);
2933        assert!(!int32_result_2.is_null(7)); // 2147483647 (fits in Int32)
2934        assert_eq!(int32_result_2.value(7), 2147483647);
2935
2936        // Row 8: Large Int64 should fail conversion -> NULL
2937        assert!(int32_result_2.is_null(8)); // 9223372036854775807 (too large for Int32)
2938    }
2939
2940    #[test]
2941    fn test_struct_extraction_subset_superset_schema_perfectly_shredded() {
2942        // Create variant with diverse null patterns and empty objects
2943        let variant_array = create_comprehensive_shredded_variant();
2944
2945        // Request struct with fields "a", "b", "d" (skip existing "c", add missing "d")
2946        let struct_fields = Fields::from(vec![
2947            Field::new("a", DataType::Int32, true),
2948            Field::new("b", DataType::Int32, true),
2949            Field::new("d", DataType::Int32, true),
2950        ]);
2951        let struct_type = DataType::Struct(struct_fields);
2952
2953        let options = GetOptions {
2954            path: VariantPath::default(),
2955            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
2956            cast_options: CastOptions::default(),
2957        };
2958
2959        let result = variant_get(&variant_array, options).unwrap();
2960
2961        // Verify the result is a StructArray with 3 fields and 5 rows
2962        let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
2963        assert_eq!(struct_result.len(), 5);
2964        assert_eq!(struct_result.num_columns(), 3);
2965
2966        let field_a = struct_result
2967            .column(0)
2968            .as_any()
2969            .downcast_ref::<Int32Array>()
2970            .unwrap();
2971        let field_b = struct_result
2972            .column(1)
2973            .as_any()
2974            .downcast_ref::<Int32Array>()
2975            .unwrap();
2976        let field_d = struct_result
2977            .column(2)
2978            .as_any()
2979            .downcast_ref::<Int32Array>()
2980            .unwrap();
2981
2982        // Row 0: Normal values {"a": 1, "b": 2, "c": 3} → {a: 1, b: 2, d: NULL}
2983        assert!(!struct_result.is_null(0));
2984        assert_eq!(field_a.value(0), 1);
2985        assert_eq!(field_b.value(0), 2);
2986        assert!(field_d.is_null(0)); // Missing field "d"
2987
2988        // Row 1: Top-level NULL → struct-level NULL
2989        assert!(struct_result.is_null(1));
2990
2991        // Row 2: Field "a" missing → {a: NULL, b: 2, d: NULL}
2992        assert!(!struct_result.is_null(2));
2993        assert!(field_a.is_null(2)); // Missing field "a"
2994        assert_eq!(field_b.value(2), 2);
2995        assert!(field_d.is_null(2)); // Missing field "d"
2996
2997        // Row 3: Field "b" missing → {a: 1, b: NULL, d: NULL}
2998        assert!(!struct_result.is_null(3));
2999        assert_eq!(field_a.value(3), 1);
3000        assert!(field_b.is_null(3)); // Missing field "b"
3001        assert!(field_d.is_null(3)); // Missing field "d"
3002
3003        // Row 4: Empty object {} → {a: NULL, b: NULL, d: NULL}
3004        assert!(!struct_result.is_null(4));
3005        assert!(field_a.is_null(4)); // Empty object
3006        assert!(field_b.is_null(4)); // Empty object
3007        assert!(field_d.is_null(4)); // Missing field "d"
3008    }
3009
3010    #[test]
3011    fn test_nested_struct_extraction_perfectly_shredded() {
3012        // Create nested variant with diverse null patterns
3013        let variant_array = create_comprehensive_nested_shredded_variant();
3014        println!("variant_array: {variant_array:?}");
3015
3016        // Request 3-level nested struct type {"outer": {"inner": INT}}
3017        let inner_field = Field::new("inner", DataType::Int32, true);
3018        let inner_type = DataType::Struct(Fields::from(vec![inner_field]));
3019        let outer_field = Field::new("outer", inner_type, true);
3020        let result_type = DataType::Struct(Fields::from(vec![outer_field]));
3021
3022        let options = GetOptions {
3023            path: VariantPath::default(),
3024            as_type: Some(Arc::new(Field::new("result", result_type, true))),
3025            cast_options: CastOptions::default(),
3026        };
3027
3028        let result = variant_get(&variant_array, options).unwrap();
3029        println!("result: {result:?}");
3030
3031        // Verify the result is a StructArray with "outer" field and 4 rows
3032        let outer_struct = result.as_any().downcast_ref::<StructArray>().unwrap();
3033        assert_eq!(outer_struct.len(), 4);
3034        assert_eq!(outer_struct.num_columns(), 1);
3035
3036        // Get the "inner" struct column
3037        let inner_struct = outer_struct
3038            .column(0)
3039            .as_any()
3040            .downcast_ref::<StructArray>()
3041            .unwrap();
3042        assert_eq!(inner_struct.num_columns(), 1);
3043
3044        // Get the "leaf" field (Int32 values)
3045        let leaf_field = inner_struct
3046            .column(0)
3047            .as_any()
3048            .downcast_ref::<Int32Array>()
3049            .unwrap();
3050
3051        // Row 0: Normal nested {"outer": {"inner": {"leaf": 42}}}
3052        assert!(!outer_struct.is_null(0));
3053        assert!(!inner_struct.is_null(0));
3054        assert_eq!(leaf_field.value(0), 42);
3055
3056        // Row 1: "inner" field missing → {outer: {inner: NULL}}
3057        assert!(!outer_struct.is_null(1));
3058        assert!(!inner_struct.is_null(1)); // outer exists, inner exists but leaf is NULL
3059        assert!(leaf_field.is_null(1)); // leaf field is NULL
3060
3061        // Row 2: "outer" field missing → {outer: NULL}
3062        assert!(!outer_struct.is_null(2));
3063        assert!(inner_struct.is_null(2)); // outer field is NULL
3064
3065        // Row 3: Top-level NULL → struct-level NULL
3066        assert!(outer_struct.is_null(3));
3067    }
3068
3069    #[test]
3070    fn test_path_based_null_masks_one_step() {
3071        // Create nested variant with diverse null patterns
3072        let variant_array = create_comprehensive_nested_shredded_variant();
3073
3074        // Extract "outer" field using path-based variant_get
3075        let path = VariantPath::from("outer");
3076        let inner_field = Field::new("inner", DataType::Int32, true);
3077        let result_type = DataType::Struct(Fields::from(vec![inner_field]));
3078
3079        let options = GetOptions {
3080            path,
3081            as_type: Some(Arc::new(Field::new("result", result_type, true))),
3082            cast_options: CastOptions::default(),
3083        };
3084
3085        let result = variant_get(&variant_array, options).unwrap();
3086
3087        // Verify the result is a StructArray with "inner" field and 4 rows
3088        let outer_result = result.as_any().downcast_ref::<StructArray>().unwrap();
3089        assert_eq!(outer_result.len(), 4);
3090        assert_eq!(outer_result.num_columns(), 1);
3091
3092        // Get the "inner" field (Int32 values)
3093        let inner_field = outer_result
3094            .column(0)
3095            .as_any()
3096            .downcast_ref::<Int32Array>()
3097            .unwrap();
3098
3099        // Row 0: Normal nested {"outer": {"inner": 42}} → {"inner": 42}
3100        assert!(!outer_result.is_null(0));
3101        assert_eq!(inner_field.value(0), 42);
3102
3103        // Row 1: Inner field null {"outer": {"inner": null}} → {"inner": null}
3104        assert!(!outer_result.is_null(1));
3105        assert!(inner_field.is_null(1));
3106
3107        // Row 2: Outer field null {"outer": null} → null (entire struct is null)
3108        assert!(outer_result.is_null(2));
3109
3110        // Row 3: Top-level null → null (entire struct is null)
3111        assert!(outer_result.is_null(3));
3112    }
3113
3114    #[test]
3115    fn test_path_based_null_masks_two_steps() {
3116        // Create nested variant with diverse null patterns
3117        let variant_array = create_comprehensive_nested_shredded_variant();
3118
3119        // Extract "outer.inner" field using path-based variant_get
3120        let path = VariantPath::from("outer").join("inner");
3121
3122        let options = GetOptions {
3123            path,
3124            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
3125            cast_options: CastOptions::default(),
3126        };
3127
3128        let result = variant_get(&variant_array, options).unwrap();
3129
3130        // Verify the result is an Int32Array with 4 rows
3131        let int_result = result.as_any().downcast_ref::<Int32Array>().unwrap();
3132        assert_eq!(int_result.len(), 4);
3133
3134        // Row 0: Normal nested {"outer": {"inner": 42}} → 42
3135        assert!(!int_result.is_null(0));
3136        assert_eq!(int_result.value(0), 42);
3137
3138        // Row 1: Inner field null {"outer": {"inner": null}} → null
3139        assert!(int_result.is_null(1));
3140
3141        // Row 2: Outer field null {"outer": null} → null (path traversal fails)
3142        assert!(int_result.is_null(2));
3143
3144        // Row 3: Top-level null → null (path traversal fails)
3145        assert!(int_result.is_null(3));
3146    }
3147
3148    #[test]
3149    fn test_struct_extraction_mixed_and_unshredded() {
3150        // Create a partially shredded variant (x shredded, y not)
3151        let variant_array = create_mixed_and_unshredded_variant();
3152
3153        // Request struct with both shredded and unshredded fields
3154        let struct_fields = Fields::from(vec![
3155            Field::new("x", DataType::Int32, true),
3156            Field::new("y", DataType::Int32, true),
3157        ]);
3158        let struct_type = DataType::Struct(struct_fields);
3159
3160        let options = GetOptions {
3161            path: VariantPath::default(),
3162            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
3163            cast_options: CastOptions::default(),
3164        };
3165
3166        let result = variant_get(&variant_array, options).unwrap();
3167
3168        // Verify the mixed shredding works (should succeed with current implementation)
3169        let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
3170        assert_eq!(struct_result.len(), 4);
3171        assert_eq!(struct_result.num_columns(), 2);
3172
3173        let field_x = struct_result
3174            .column(0)
3175            .as_any()
3176            .downcast_ref::<Int32Array>()
3177            .unwrap();
3178        let field_y = struct_result
3179            .column(1)
3180            .as_any()
3181            .downcast_ref::<Int32Array>()
3182            .unwrap();
3183
3184        // Row 0: {"x": 1, "y": 42} - x from shredded, y from value field
3185        assert_eq!(field_x.value(0), 1);
3186        assert_eq!(field_y.value(0), 42);
3187
3188        // Row 1: {"x": 2} - x from shredded, y missing (perfect shredding)
3189        assert_eq!(field_x.value(1), 2);
3190        assert!(field_y.is_null(1));
3191
3192        // Row 2: {"x": 3, "y": null} - x from shredded, y explicitly null in value
3193        assert_eq!(field_x.value(2), 3);
3194        assert!(field_y.is_null(2));
3195
3196        // Row 3: top-level null - entire struct row should be null
3197        assert!(struct_result.is_null(3));
3198    }
3199
3200    /// Test that demonstrates the actual struct row builder gap
3201    /// This test should fail because it hits unshredded nested structs
3202    #[test]
3203    fn test_struct_row_builder_gap_demonstration() {
3204        // Create completely unshredded JSON variant (no typed_value at all)
3205        let json_strings = vec![
3206            r#"{"outer": {"inner": 42}}"#,
3207            r#"{"outer": {"inner": 100}}"#,
3208        ];
3209        let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
3210        let variant_array = json_to_variant(&string_array).unwrap();
3211
3212        // Request nested struct - this should fail at the row builder level
3213        let inner_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]);
3214        let inner_struct_type = DataType::Struct(inner_fields);
3215        let outer_fields = Fields::from(vec![Field::new("outer", inner_struct_type, true)]);
3216        let outer_struct_type = DataType::Struct(outer_fields);
3217
3218        let options = GetOptions {
3219            path: VariantPath::default(),
3220            as_type: Some(Arc::new(Field::new("result", outer_struct_type, true))),
3221            cast_options: CastOptions::default(),
3222        };
3223
3224        let variant_array_ref = ArrayRef::from(variant_array);
3225        let result = variant_get(&variant_array_ref, options);
3226
3227        // Should fail with NotYetImplemented when the row builder tries to handle struct type
3228        assert!(result.is_err());
3229        let error = result.unwrap_err();
3230        assert!(error.to_string().contains("Not yet implemented"));
3231    }
3232
3233    /// Create comprehensive shredded variant with diverse null patterns and empty objects
3234    /// Rows: normal values, top-level null, missing field a, missing field b, empty object
3235    fn create_comprehensive_shredded_variant() -> ArrayRef {
3236        let (metadata, _) = {
3237            let mut builder = parquet_variant::VariantBuilder::new();
3238            let obj = builder.new_object();
3239            obj.finish();
3240            builder.finish()
3241        };
3242
3243        // Create null buffer for top-level nulls
3244        let nulls = NullBuffer::from(vec![
3245            true,  // row 0: normal values
3246            false, // row 1: top-level null
3247            true,  // row 2: missing field a
3248            true,  // row 3: missing field b
3249            true,  // row 4: empty object
3250        ]);
3251
3252        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 5));
3253
3254        // Create shredded fields with different null patterns
3255        // Field "a": present in rows 0,3 (missing in rows 1,2,4)
3256        let a_field_typed_value = Int32Array::from(vec![Some(1), None, None, Some(1), None]);
3257        let a_field_struct = StructArrayBuilder::new()
3258            .with_field("typed_value", Arc::new(a_field_typed_value), true)
3259            .build();
3260        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_field_struct)
3261            .expect("should create ShreddedVariantFieldArray for a");
3262
3263        // Field "b": present in rows 0,2 (missing in rows 1,3,4)
3264        let b_field_typed_value = Int32Array::from(vec![Some(2), None, Some(2), None, None]);
3265        let b_field_struct = StructArrayBuilder::new()
3266            .with_field("typed_value", Arc::new(b_field_typed_value), true)
3267            .build();
3268        let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_field_struct)
3269            .expect("should create ShreddedVariantFieldArray for b");
3270
3271        // Field "c": present in row 0 only (missing in all other rows)
3272        let c_field_typed_value = Int32Array::from(vec![Some(3), None, None, None, None]);
3273        let c_field_struct = StructArrayBuilder::new()
3274            .with_field("typed_value", Arc::new(c_field_typed_value), true)
3275            .build();
3276        let c_field_shredded = ShreddedVariantFieldArray::try_new(&c_field_struct)
3277            .expect("should create ShreddedVariantFieldArray for c");
3278
3279        // Create main typed_value struct
3280        let typed_value_fields = Fields::from(vec![
3281            Field::new("a", a_field_shredded.data_type().clone(), true),
3282            Field::new("b", b_field_shredded.data_type().clone(), true),
3283            Field::new("c", c_field_shredded.data_type().clone(), true),
3284        ]);
3285        let typed_value_struct = StructArray::try_new(
3286            typed_value_fields,
3287            vec![
3288                ArrayRef::from(a_field_shredded),
3289                ArrayRef::from(b_field_shredded),
3290                ArrayRef::from(c_field_shredded),
3291            ],
3292            None,
3293        )
3294        .unwrap();
3295
3296        // Build final VariantArray with top-level nulls
3297        let struct_array = StructArrayBuilder::new()
3298            .with_field("metadata", Arc::new(metadata_array), false)
3299            .with_field("typed_value", Arc::new(typed_value_struct), true)
3300            .with_nulls(nulls)
3301            .build();
3302
3303        Arc::new(struct_array)
3304    }
3305
3306    /// Create comprehensive nested shredded variant with diverse null patterns
3307    /// Represents 3-level structure: variant -> outer -> inner (INT value)
3308    /// The shredding schema is: {"metadata": BINARY, "typed_value": {"outer": {"typed_value": {"inner": {"typed_value": INT}}}}}
3309    /// Rows: normal nested value, inner field null, outer field null, top-level null
3310    fn create_comprehensive_nested_shredded_variant() -> ArrayRef {
3311        // Create the inner level: contains typed_value with Int32 values
3312        // Row 0: has value 42, Row 1: inner null, Row 2: outer null, Row 3: top-level null
3313        let inner_typed_value = Int32Array::from(vec![Some(42), None, None, None]); // dummy value for row 2
3314        let inner = StructArrayBuilder::new()
3315            .with_field("typed_value", Arc::new(inner_typed_value), true)
3316            .build();
3317        let inner = ShreddedVariantFieldArray::try_new(&inner).unwrap();
3318
3319        let outer_typed_value_nulls = NullBuffer::from(vec![
3320            true,  // row 0: inner struct exists with typed_value=42
3321            false, // row 1: inner field NULL
3322            false, // row 2: outer field NULL
3323            false, // row 3: top-level NULL
3324        ]);
3325        let outer_typed_value = StructArrayBuilder::new()
3326            .with_field("inner", ArrayRef::from(inner), false)
3327            .with_nulls(outer_typed_value_nulls)
3328            .build();
3329
3330        let outer = StructArrayBuilder::new()
3331            .with_field("typed_value", Arc::new(outer_typed_value), true)
3332            .build();
3333        let outer = ShreddedVariantFieldArray::try_new(&outer).unwrap();
3334
3335        let typed_value_nulls = NullBuffer::from(vec![
3336            true,  // row 0: inner struct exists with typed_value=42
3337            true,  // row 1: inner field NULL
3338            false, // row 2: outer field NULL
3339            false, // row 3: top-level NULL
3340        ]);
3341        let typed_value = StructArrayBuilder::new()
3342            .with_field("outer", ArrayRef::from(outer), false)
3343            .with_nulls(typed_value_nulls)
3344            .build();
3345
3346        // Build final VariantArray with top-level nulls
3347        let metadata_array =
3348            BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 4));
3349        let nulls = NullBuffer::from(vec![
3350            true,  // row 0: inner struct exists with typed_value=42
3351            true,  // row 1: inner field NULL
3352            true,  // row 2: outer field NULL
3353            false, // row 3: top-level NULL
3354        ]);
3355        let struct_array = StructArrayBuilder::new()
3356            .with_field("metadata", Arc::new(metadata_array), false)
3357            .with_field("typed_value", Arc::new(typed_value), true)
3358            .with_nulls(nulls)
3359            .build();
3360
3361        Arc::new(struct_array)
3362    }
3363
3364    /// Create variant with mixed shredding (spec-compliant) including null scenarios
3365    /// Field "x" is globally shredded, field "y" is never shredded
3366    fn create_mixed_and_unshredded_variant() -> ArrayRef {
3367        // Create spec-compliant mixed shredding:
3368        // - Field "x" is globally shredded (has typed_value column)
3369        // - Field "y" is never shredded (only appears in value field when present)
3370
3371        let (metadata, y_field_value) = {
3372            let mut builder = parquet_variant::VariantBuilder::new();
3373            let mut obj = builder.new_object();
3374            obj.insert("y", Variant::from(42));
3375            obj.finish();
3376            builder.finish()
3377        };
3378
3379        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
3380
3381        // Value field contains objects with unshredded fields only (never contains "x")
3382        // Row 0: {"y": "foo"} - x is shredded out, y remains in value
3383        // Row 1: {} - both x and y are absent (perfect shredding for x, y missing)
3384        // Row 2: {"y": null} - x is shredded out, y explicitly null
3385        // Row 3: top-level null (encoded in VariantArray's null mask, but fields contain valid data)
3386
3387        let empty_object_value = {
3388            let mut builder = parquet_variant::VariantBuilder::new();
3389            builder.new_object().finish();
3390            let (_, value) = builder.finish();
3391            value
3392        };
3393
3394        let y_null_value = {
3395            let mut builder = parquet_variant::VariantBuilder::new();
3396            builder.new_object().with_field("y", Variant::Null).finish();
3397            let (_, value) = builder.finish();
3398            value
3399        };
3400
3401        let value_array = BinaryViewArray::from(vec![
3402            Some(y_field_value.as_slice()),      // Row 0: {"y": 42}
3403            Some(empty_object_value.as_slice()), // Row 1: {}
3404            Some(y_null_value.as_slice()),       // Row 2: {"y": null}
3405            Some(empty_object_value.as_slice()), // Row 3: top-level null (but value field contains valid data)
3406        ]);
3407
3408        // Create shredded field "x" (globally shredded - never appears in value field)
3409        // For top-level null row, the field still needs valid content (not null)
3410        let x_field_typed_value = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(0)]);
3411        let x_field_struct = StructArrayBuilder::new()
3412            .with_field("typed_value", Arc::new(x_field_typed_value), true)
3413            .build();
3414        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
3415            .expect("should create ShreddedVariantFieldArray for x");
3416
3417        // Create main typed_value struct (only contains shredded fields)
3418        let typed_value_struct = StructArrayBuilder::new()
3419            .with_field("x", ArrayRef::from(x_field_shredded), false)
3420            .build();
3421
3422        // Build VariantArray with both value and typed_value (PartiallyShredded)
3423        // Top-level null is encoded in the main StructArray's null mask
3424        let variant_nulls = NullBuffer::from(vec![true, true, true, false]); // Row 3 is top-level null
3425        let struct_array = StructArrayBuilder::new()
3426            .with_field("metadata", Arc::new(metadata_array), false)
3427            .with_field("value", Arc::new(value_array), true)
3428            .with_field("typed_value", Arc::new(typed_value_struct), true)
3429            .with_nulls(variant_nulls)
3430            .build();
3431
3432        Arc::new(struct_array)
3433    }
3434
3435    #[test]
3436    fn get_decimal32_rescaled_to_scale2() {
3437        // Build unshredded variant values with different scales
3438        let mut builder = crate::VariantArrayBuilder::new(5);
3439        builder.append_variant(VariantDecimal4::try_new(1234, 2).unwrap().into()); // 12.34
3440        builder.append_variant(VariantDecimal4::try_new(1234, 3).unwrap().into()); // 1.234
3441        builder.append_variant(VariantDecimal4::try_new(1234, 0).unwrap().into()); // 1234
3442        builder.append_null();
3443        builder.append_variant(
3444            VariantDecimal8::try_new((VariantDecimal4::MAX_UNSCALED_VALUE as i64) + 1, 3)
3445                .unwrap()
3446                .into(),
3447        ); // should fit into Decimal32
3448        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3449
3450        let field = Field::new("result", DataType::Decimal32(9, 2), true);
3451        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3452        let result = variant_get(&variant_array, options).unwrap();
3453        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3454
3455        assert_eq!(result.precision(), 9);
3456        assert_eq!(result.scale(), 2);
3457        assert_eq!(result.value(0), 1234);
3458        assert_eq!(result.value(1), 123);
3459        assert_eq!(result.value(2), 123400);
3460        assert!(result.is_null(3));
3461        assert_eq!(
3462            result.value(4),
3463            VariantDecimal4::MAX_UNSCALED_VALUE / 10 + 1
3464        ); // should not be null as the final result fits into Decimal32
3465    }
3466
3467    #[test]
3468    fn get_decimal32_scale_down_rounding() {
3469        let mut builder = crate::VariantArrayBuilder::new(7);
3470        builder.append_variant(VariantDecimal4::try_new(1235, 0).unwrap().into());
3471        builder.append_variant(VariantDecimal4::try_new(1245, 0).unwrap().into());
3472        builder.append_variant(VariantDecimal4::try_new(-1235, 0).unwrap().into());
3473        builder.append_variant(VariantDecimal4::try_new(-1245, 0).unwrap().into());
3474        builder.append_variant(VariantDecimal4::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
3475        builder.append_variant(VariantDecimal4::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
3476        builder.append_variant(VariantDecimal4::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
3477        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3478
3479        let field = Field::new("result", DataType::Decimal32(9, -1), true);
3480        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3481        let result = variant_get(&variant_array, options).unwrap();
3482        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3483
3484        assert_eq!(result.precision(), 9);
3485        assert_eq!(result.scale(), -1);
3486        assert_eq!(result.value(0), 124);
3487        assert_eq!(result.value(1), 125);
3488        assert_eq!(result.value(2), -124);
3489        assert_eq!(result.value(3), -125);
3490        assert_eq!(result.value(4), 1);
3491        assert!(result.is_valid(5));
3492        assert_eq!(result.value(5), 0);
3493        assert_eq!(result.value(6), 1);
3494    }
3495
3496    #[test]
3497    fn get_decimal32_large_scale_reduction() {
3498        let mut builder = crate::VariantArrayBuilder::new(2);
3499        builder.append_variant(
3500            VariantDecimal4::try_new(-VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3501                .unwrap()
3502                .into(),
3503        );
3504        builder.append_variant(
3505            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3506                .unwrap()
3507                .into(),
3508        );
3509        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3510
3511        let field = Field::new("result", DataType::Decimal32(9, -9), true);
3512        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3513        let result = variant_get(&variant_array, options).unwrap();
3514        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3515
3516        assert_eq!(result.precision(), 9);
3517        assert_eq!(result.scale(), -9);
3518        assert_eq!(result.value(0), -1);
3519        assert_eq!(result.value(1), 1);
3520
3521        let field = Field::new("result", DataType::Decimal32(9, -10), true);
3522        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3523        let result = variant_get(&variant_array, options).unwrap();
3524        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3525
3526        assert_eq!(result.precision(), 9);
3527        assert_eq!(result.scale(), -10);
3528        assert!(result.is_valid(0));
3529        assert_eq!(result.value(0), 0);
3530        assert!(result.is_valid(1));
3531        assert_eq!(result.value(1), 0);
3532    }
3533
3534    #[test]
3535    fn get_decimal32_precision_overflow_safe() {
3536        // Exceed Decimal32 after scaling and rounding
3537        let mut builder = crate::VariantArrayBuilder::new(2);
3538        builder.append_variant(
3539            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3540                .unwrap()
3541                .into(),
3542        );
3543        builder.append_variant(
3544            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 9)
3545                .unwrap()
3546                .into(),
3547        ); // integer value round up overflows
3548        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3549
3550        let field = Field::new("result", DataType::Decimal32(2, 2), true);
3551        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3552        let result = variant_get(&variant_array, options).unwrap();
3553        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3554
3555        assert!(result.is_null(0));
3556        assert!(result.is_null(1)); // should overflow because 1.00 does not fit into precision (2)
3557    }
3558
3559    #[test]
3560    fn get_decimal32_precision_overflow_unsafe_errors() {
3561        let mut builder = crate::VariantArrayBuilder::new(1);
3562        builder.append_variant(
3563            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3564                .unwrap()
3565                .into(),
3566        );
3567        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3568
3569        let field = Field::new("result", DataType::Decimal32(9, 2), true);
3570        let cast_options = CastOptions {
3571            safe: false,
3572            ..Default::default()
3573        };
3574        let options = GetOptions::new()
3575            .with_as_type(Some(FieldRef::from(field)))
3576            .with_cast_options(cast_options);
3577        let err = variant_get(&variant_array, options).unwrap_err();
3578
3579        assert!(
3580            err.to_string().contains(
3581                "Failed to cast to Decimal32(precision=9, scale=2) from variant Decimal4"
3582            )
3583        );
3584    }
3585
3586    #[test]
3587    fn get_decimal64_rescaled_to_scale2() {
3588        let mut builder = crate::VariantArrayBuilder::new(5);
3589        builder.append_variant(VariantDecimal8::try_new(1234, 2).unwrap().into()); // 12.34
3590        builder.append_variant(VariantDecimal8::try_new(1234, 3).unwrap().into()); // 1.234
3591        builder.append_variant(VariantDecimal8::try_new(1234, 0).unwrap().into()); // 1234
3592        builder.append_null();
3593        builder.append_variant(
3594            VariantDecimal16::try_new((VariantDecimal8::MAX_UNSCALED_VALUE as i128) + 1, 3)
3595                .unwrap()
3596                .into(),
3597        ); // should fit into Decimal64
3598        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3599
3600        let field = Field::new("result", DataType::Decimal64(18, 2), true);
3601        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3602        let result = variant_get(&variant_array, options).unwrap();
3603        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3604
3605        assert_eq!(result.precision(), 18);
3606        assert_eq!(result.scale(), 2);
3607        assert_eq!(result.value(0), 1234);
3608        assert_eq!(result.value(1), 123);
3609        assert_eq!(result.value(2), 123400);
3610        assert!(result.is_null(3));
3611        assert_eq!(
3612            result.value(4),
3613            VariantDecimal8::MAX_UNSCALED_VALUE / 10 + 1
3614        ); // should not be null as the final result fits into Decimal64
3615    }
3616
3617    #[test]
3618    fn get_decimal64_scale_down_rounding() {
3619        let mut builder = crate::VariantArrayBuilder::new(7);
3620        builder.append_variant(VariantDecimal8::try_new(1235, 0).unwrap().into());
3621        builder.append_variant(VariantDecimal8::try_new(1245, 0).unwrap().into());
3622        builder.append_variant(VariantDecimal8::try_new(-1235, 0).unwrap().into());
3623        builder.append_variant(VariantDecimal8::try_new(-1245, 0).unwrap().into());
3624        builder.append_variant(VariantDecimal8::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
3625        builder.append_variant(VariantDecimal8::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
3626        builder.append_variant(VariantDecimal8::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
3627        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3628
3629        let field = Field::new("result", DataType::Decimal64(18, -1), true);
3630        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3631        let result = variant_get(&variant_array, options).unwrap();
3632        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3633
3634        assert_eq!(result.precision(), 18);
3635        assert_eq!(result.scale(), -1);
3636        assert_eq!(result.value(0), 124);
3637        assert_eq!(result.value(1), 125);
3638        assert_eq!(result.value(2), -124);
3639        assert_eq!(result.value(3), -125);
3640        assert_eq!(result.value(4), 1);
3641        assert!(result.is_valid(5));
3642        assert_eq!(result.value(5), 0);
3643        assert_eq!(result.value(6), 1);
3644    }
3645
3646    #[test]
3647    fn get_decimal64_large_scale_reduction() {
3648        let mut builder = crate::VariantArrayBuilder::new(2);
3649        builder.append_variant(
3650            VariantDecimal8::try_new(-VariantDecimal8::MAX_UNSCALED_VALUE, 0)
3651                .unwrap()
3652                .into(),
3653        );
3654        builder.append_variant(
3655            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
3656                .unwrap()
3657                .into(),
3658        );
3659        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3660
3661        let field = Field::new("result", DataType::Decimal64(18, -18), true);
3662        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3663        let result = variant_get(&variant_array, options).unwrap();
3664        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3665
3666        assert_eq!(result.precision(), 18);
3667        assert_eq!(result.scale(), -18);
3668        assert_eq!(result.value(0), -1);
3669        assert_eq!(result.value(1), 1);
3670
3671        let field = Field::new("result", DataType::Decimal64(18, -19), true);
3672        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3673        let result = variant_get(&variant_array, options).unwrap();
3674        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3675
3676        assert_eq!(result.precision(), 18);
3677        assert_eq!(result.scale(), -19);
3678        assert!(result.is_valid(0));
3679        assert_eq!(result.value(0), 0);
3680        assert!(result.is_valid(1));
3681        assert_eq!(result.value(1), 0);
3682    }
3683
3684    #[test]
3685    fn get_decimal64_precision_overflow_safe() {
3686        // Exceed Decimal64 after scaling and rounding
3687        let mut builder = crate::VariantArrayBuilder::new(2);
3688        builder.append_variant(
3689            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
3690                .unwrap()
3691                .into(),
3692        );
3693        builder.append_variant(
3694            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 18)
3695                .unwrap()
3696                .into(),
3697        ); // integer value round up overflows
3698        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3699
3700        let field = Field::new("result", DataType::Decimal64(2, 2), true);
3701        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3702        let result = variant_get(&variant_array, options).unwrap();
3703        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3704
3705        assert!(result.is_null(0));
3706        assert!(result.is_null(1));
3707    }
3708
3709    #[test]
3710    fn get_decimal64_precision_overflow_unsafe_errors() {
3711        let mut builder = crate::VariantArrayBuilder::new(1);
3712        builder.append_variant(
3713            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
3714                .unwrap()
3715                .into(),
3716        );
3717        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3718
3719        let field = Field::new("result", DataType::Decimal64(18, 2), true);
3720        let cast_options = CastOptions {
3721            safe: false,
3722            ..Default::default()
3723        };
3724        let options = GetOptions::new()
3725            .with_as_type(Some(FieldRef::from(field)))
3726            .with_cast_options(cast_options);
3727        let err = variant_get(&variant_array, options).unwrap_err();
3728
3729        assert!(
3730            err.to_string().contains(
3731                "Failed to cast to Decimal64(precision=18, scale=2) from variant Decimal8"
3732            )
3733        );
3734    }
3735
3736    #[test]
3737    fn get_decimal128_rescaled_to_scale2() {
3738        let mut builder = crate::VariantArrayBuilder::new(4);
3739        builder.append_variant(VariantDecimal16::try_new(1234, 2).unwrap().into());
3740        builder.append_variant(VariantDecimal16::try_new(1234, 3).unwrap().into());
3741        builder.append_variant(VariantDecimal16::try_new(1234, 0).unwrap().into());
3742        builder.append_null();
3743        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3744
3745        let field = Field::new("result", DataType::Decimal128(38, 2), true);
3746        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3747        let result = variant_get(&variant_array, options).unwrap();
3748        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
3749
3750        assert_eq!(result.precision(), 38);
3751        assert_eq!(result.scale(), 2);
3752        assert_eq!(result.value(0), 1234);
3753        assert_eq!(result.value(1), 123);
3754        assert_eq!(result.value(2), 123400);
3755        assert!(result.is_null(3));
3756    }
3757
3758    #[test]
3759    fn get_decimal128_scale_down_rounding() {
3760        let mut builder = crate::VariantArrayBuilder::new(7);
3761        builder.append_variant(VariantDecimal16::try_new(1235, 0).unwrap().into());
3762        builder.append_variant(VariantDecimal16::try_new(1245, 0).unwrap().into());
3763        builder.append_variant(VariantDecimal16::try_new(-1235, 0).unwrap().into());
3764        builder.append_variant(VariantDecimal16::try_new(-1245, 0).unwrap().into());
3765        builder.append_variant(VariantDecimal16::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
3766        builder.append_variant(VariantDecimal16::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
3767        builder.append_variant(VariantDecimal16::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
3768        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3769
3770        let field = Field::new("result", DataType::Decimal128(38, -1), true);
3771        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3772        let result = variant_get(&variant_array, options).unwrap();
3773        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
3774
3775        assert_eq!(result.precision(), 38);
3776        assert_eq!(result.scale(), -1);
3777        assert_eq!(result.value(0), 124);
3778        assert_eq!(result.value(1), 125);
3779        assert_eq!(result.value(2), -124);
3780        assert_eq!(result.value(3), -125);
3781        assert_eq!(result.value(4), 1);
3782        assert!(result.is_valid(5));
3783        assert_eq!(result.value(5), 0);
3784        assert_eq!(result.value(6), 1);
3785    }
3786
3787    #[test]
3788    fn get_decimal128_precision_overflow_safe() {
3789        // Exceed Decimal128 after scaling and rounding
3790        let mut builder = crate::VariantArrayBuilder::new(2);
3791        builder.append_variant(
3792            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
3793                .unwrap()
3794                .into(),
3795        );
3796        builder.append_variant(
3797            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 38)
3798                .unwrap()
3799                .into(),
3800        ); // integer value round up overflows
3801        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3802
3803        let field = Field::new("result", DataType::Decimal128(2, 2), true);
3804        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3805        let result = variant_get(&variant_array, options).unwrap();
3806        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
3807
3808        assert!(result.is_null(0));
3809        assert!(result.is_null(1)); // should overflow because 1.00 does not fit into precision (2)
3810    }
3811
3812    #[test]
3813    fn get_decimal128_precision_overflow_unsafe_errors() {
3814        let mut builder = crate::VariantArrayBuilder::new(1);
3815        builder.append_variant(
3816            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
3817                .unwrap()
3818                .into(),
3819        );
3820        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3821
3822        let field = Field::new("result", DataType::Decimal128(38, 2), true);
3823        let cast_options = CastOptions {
3824            safe: false,
3825            ..Default::default()
3826        };
3827        let options = GetOptions::new()
3828            .with_as_type(Some(FieldRef::from(field)))
3829            .with_cast_options(cast_options);
3830        let err = variant_get(&variant_array, options).unwrap_err();
3831
3832        assert!(err.to_string().contains(
3833            "Failed to cast to Decimal128(precision=38, scale=2) from variant Decimal16"
3834        ));
3835    }
3836
3837    #[test]
3838    fn get_decimal256_rescaled_to_scale2() {
3839        // Build unshredded variant values with different scales using Decimal16 source
3840        let mut builder = crate::VariantArrayBuilder::new(4);
3841        builder.append_variant(VariantDecimal16::try_new(1234, 2).unwrap().into()); // 12.34
3842        builder.append_variant(VariantDecimal16::try_new(1234, 3).unwrap().into()); // 1.234
3843        builder.append_variant(VariantDecimal16::try_new(1234, 0).unwrap().into()); // 1234
3844        builder.append_null();
3845        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3846
3847        let field = Field::new("result", DataType::Decimal256(76, 2), true);
3848        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3849        let result = variant_get(&variant_array, options).unwrap();
3850        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
3851
3852        assert_eq!(result.precision(), 76);
3853        assert_eq!(result.scale(), 2);
3854        assert_eq!(result.value(0), i256::from_i128(1234));
3855        assert_eq!(result.value(1), i256::from_i128(123));
3856        assert_eq!(result.value(2), i256::from_i128(123400));
3857        assert!(result.is_null(3));
3858    }
3859
3860    #[test]
3861    fn get_decimal256_scale_down_rounding() {
3862        let mut builder = crate::VariantArrayBuilder::new(7);
3863        builder.append_variant(VariantDecimal16::try_new(1235, 0).unwrap().into());
3864        builder.append_variant(VariantDecimal16::try_new(1245, 0).unwrap().into());
3865        builder.append_variant(VariantDecimal16::try_new(-1235, 0).unwrap().into());
3866        builder.append_variant(VariantDecimal16::try_new(-1245, 0).unwrap().into());
3867        builder.append_variant(VariantDecimal16::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
3868        builder.append_variant(VariantDecimal16::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
3869        builder.append_variant(VariantDecimal16::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
3870        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3871
3872        let field = Field::new("result", DataType::Decimal256(76, -1), true);
3873        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3874        let result = variant_get(&variant_array, options).unwrap();
3875        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
3876
3877        assert_eq!(result.precision(), 76);
3878        assert_eq!(result.scale(), -1);
3879        assert_eq!(result.value(0), i256::from_i128(124));
3880        assert_eq!(result.value(1), i256::from_i128(125));
3881        assert_eq!(result.value(2), i256::from_i128(-124));
3882        assert_eq!(result.value(3), i256::from_i128(-125));
3883        assert_eq!(result.value(4), i256::from_i128(1));
3884        assert!(result.is_valid(5));
3885        assert_eq!(result.value(5), i256::from_i128(0));
3886        assert_eq!(result.value(6), i256::from_i128(1));
3887    }
3888
3889    #[test]
3890    fn get_decimal256_precision_overflow_safe() {
3891        // Exceed Decimal128 max precision (38) after scaling
3892        let mut builder = crate::VariantArrayBuilder::new(2);
3893        builder.append_variant(
3894            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1)
3895                .unwrap()
3896                .into(),
3897        );
3898        builder.append_variant(
3899            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
3900                .unwrap()
3901                .into(),
3902        );
3903        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3904
3905        let field = Field::new("result", DataType::Decimal256(76, 39), true);
3906        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3907        let result = variant_get(&variant_array, options).unwrap();
3908        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
3909
3910        // Input is Decimal16 with integer = 10^38-1 and scale = 1, target scale = 39
3911        // So expected integer is (10^38-1) * 10^(39-1) = (10^38-1) * 10^38
3912        let base = i256::from_i128(10);
3913        let factor = base.checked_pow(38).unwrap();
3914        let expected = i256::from_i128(VariantDecimal16::MAX_UNSCALED_VALUE)
3915            .checked_mul(factor)
3916            .unwrap();
3917        assert_eq!(result.value(0), expected);
3918        assert!(result.is_null(1));
3919    }
3920
3921    #[test]
3922    fn get_decimal256_precision_overflow_unsafe_errors() {
3923        // Exceed Decimal128 max precision (38) after scaling
3924        let mut builder = crate::VariantArrayBuilder::new(2);
3925        builder.append_variant(
3926            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1)
3927                .unwrap()
3928                .into(),
3929        );
3930        builder.append_variant(
3931            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
3932                .unwrap()
3933                .into(),
3934        );
3935        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3936
3937        let field = Field::new("result", DataType::Decimal256(76, 39), true);
3938        let cast_options = CastOptions {
3939            safe: false,
3940            ..Default::default()
3941        };
3942        let options = GetOptions::new()
3943            .with_as_type(Some(FieldRef::from(field)))
3944            .with_cast_options(cast_options);
3945        let err = variant_get(&variant_array, options).unwrap_err();
3946
3947        assert!(err.to_string().contains(
3948            "Failed to cast to Decimal256(precision=76, scale=39) from variant Decimal16"
3949        ));
3950    }
3951
3952    #[test]
3953    fn get_non_supported_temporal_types_error() {
3954        let values = vec![None, Some(Variant::Null), Some(Variant::BooleanFalse)];
3955        let variant_array: ArrayRef = ArrayRef::from(VariantArray::from_iter(values));
3956
3957        let test_cases = vec![
3958            FieldRef::from(Field::new(
3959                "result",
3960                DataType::Duration(TimeUnit::Microsecond),
3961                true,
3962            )),
3963            FieldRef::from(Field::new(
3964                "result",
3965                DataType::Interval(IntervalUnit::YearMonth),
3966                true,
3967            )),
3968        ];
3969
3970        for field in test_cases {
3971            let options = GetOptions::new().with_as_type(Some(field));
3972            let err = variant_get(&variant_array, options).unwrap_err();
3973            assert!(
3974                err.to_string()
3975                    .contains("Casting Variant to duration/interval types is not supported")
3976            );
3977        }
3978    }
3979
3980    fn invalid_time_variant_array() -> ArrayRef {
3981        let mut builder = VariantArrayBuilder::new(3);
3982        // 86401000000 is invalid for Time64Microsecond (max is 86400000000)
3983        builder.append_variant(Variant::Int64(86401000000));
3984        builder.append_variant(Variant::Int64(86401000000));
3985        builder.append_variant(Variant::Int64(86401000000));
3986        Arc::new(builder.build().into_inner())
3987    }
3988
3989    #[test]
3990    fn test_variant_get_error_when_cast_failure_and_safe_false() {
3991        let variant_array = invalid_time_variant_array();
3992
3993        let field = Field::new("result", DataType::Time64(TimeUnit::Microsecond), true);
3994        let cast_options = CastOptions {
3995            safe: false, // Will error on cast failure
3996            ..Default::default()
3997        };
3998        let options = GetOptions::new()
3999            .with_as_type(Some(FieldRef::from(field)))
4000            .with_cast_options(cast_options);
4001        let err = variant_get(&variant_array, options).unwrap_err();
4002        assert!(
4003            err.to_string().contains(
4004                "Cast error: Failed to extract primitive of type Time64(µs) from variant Int64(86401000000) at path VariantPath([])"
4005            ),
4006            "actual: {err}",
4007        );
4008    }
4009
4010    #[test]
4011    fn test_variant_get_return_null_when_cast_failure_and_safe_true() {
4012        let variant_array = invalid_time_variant_array();
4013
4014        let field = Field::new("result", DataType::Time64(TimeUnit::Microsecond), true);
4015        let cast_options = CastOptions {
4016            safe: true, // Will return null on cast failure
4017            ..Default::default()
4018        };
4019        let options = GetOptions::new()
4020            .with_as_type(Some(FieldRef::from(field)))
4021            .with_cast_options(cast_options);
4022        let result = variant_get(&variant_array, options).unwrap();
4023        assert_eq!(3, result.len());
4024
4025        for i in 0..3 {
4026            assert!(result.is_null(i));
4027        }
4028    }
4029
4030    #[test]
4031    fn test_perfect_shredding_returns_same_arc_ptr() {
4032        let variant_array = perfectly_shredded_int32_variant_array();
4033
4034        let variant_array_ref = VariantArray::try_new(&variant_array).unwrap();
4035        let typed_value_arc = variant_array_ref.typed_value_field().unwrap().clone();
4036
4037        let field = Field::new("result", DataType::Int32, true);
4038        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4039        let result = variant_get(&variant_array, options).unwrap();
4040
4041        assert!(Arc::ptr_eq(&typed_value_arc, &result));
4042    }
4043
4044    #[test]
4045    fn test_perfect_shredding_three_typed_value_columns() {
4046        // Column 1: perfectly shredded primitive with all nulls
4047        let all_nulls_values: Arc<Int32Array> = Arc::new(Int32Array::from(vec![
4048            Option::<i32>::None,
4049            Option::<i32>::None,
4050            Option::<i32>::None,
4051        ]));
4052        let all_nulls_erased: ArrayRef = all_nulls_values.clone();
4053        let all_nulls_field =
4054            ShreddedVariantFieldArray::from_parts(None, Some(all_nulls_erased.clone()), None);
4055        let all_nulls_type = all_nulls_field.data_type().clone();
4056        let all_nulls_struct: ArrayRef = ArrayRef::from(all_nulls_field);
4057
4058        // Column 2: perfectly shredded primitive with some nulls
4059        let some_nulls_values: Arc<Int32Array> =
4060            Arc::new(Int32Array::from(vec![Some(10), None, Some(30)]));
4061        let some_nulls_erased: ArrayRef = some_nulls_values.clone();
4062        let some_nulls_field =
4063            ShreddedVariantFieldArray::from_parts(None, Some(some_nulls_erased.clone()), None);
4064        let some_nulls_type = some_nulls_field.data_type().clone();
4065        let some_nulls_struct: ArrayRef = ArrayRef::from(some_nulls_field);
4066
4067        // Column 3: perfectly shredded nested struct
4068        let inner_values: Arc<Int32Array> =
4069            Arc::new(Int32Array::from(vec![Some(111), None, Some(333)]));
4070        let inner_erased: ArrayRef = inner_values.clone();
4071        let inner_field =
4072            ShreddedVariantFieldArray::from_parts(None, Some(inner_erased.clone()), None);
4073        let inner_field_type = inner_field.data_type().clone();
4074        let inner_struct_array: ArrayRef = ArrayRef::from(inner_field);
4075
4076        let nested_struct = Arc::new(
4077            StructArray::try_new(
4078                Fields::from(vec![Field::new("inner", inner_field_type, true)]),
4079                vec![inner_struct_array],
4080                None,
4081            )
4082            .unwrap(),
4083        );
4084        let nested_struct_erased: ArrayRef = nested_struct.clone();
4085        let struct_field =
4086            ShreddedVariantFieldArray::from_parts(None, Some(nested_struct_erased.clone()), None);
4087        let struct_field_type = struct_field.data_type().clone();
4088        let struct_field_struct: ArrayRef = ArrayRef::from(struct_field);
4089
4090        // Assemble the top-level typed_value struct with the three columns above
4091        let typed_value_struct = StructArray::try_new(
4092            Fields::from(vec![
4093                Field::new("all_nulls", all_nulls_type, true),
4094                Field::new("some_nulls", some_nulls_type, true),
4095                Field::new("struct_field", struct_field_type, true),
4096            ]),
4097            vec![all_nulls_struct, some_nulls_struct, struct_field_struct],
4098            None,
4099        )
4100        .unwrap();
4101
4102        let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
4103            EMPTY_VARIANT_METADATA_BYTES,
4104            all_nulls_values.len(),
4105        ));
4106        let variant_struct = StructArrayBuilder::new()
4107            .with_field("metadata", Arc::new(metadata), false)
4108            .with_field("typed_value", Arc::new(typed_value_struct), true)
4109            .build();
4110        let variant_array: ArrayRef = VariantArray::try_new(&variant_struct).unwrap().into();
4111
4112        // Case 1: all-null primitive column should reuse the typed_value Arc directly
4113        let all_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true));
4114        let all_nulls_result = variant_get(
4115            &variant_array,
4116            GetOptions::new_with_path(VariantPath::from("all_nulls"))
4117                .with_as_type(Some(all_nulls_field_ref)),
4118        )
4119        .unwrap();
4120        assert!(Arc::ptr_eq(&all_nulls_result, &all_nulls_erased));
4121
4122        // Case 2: primitive column with some nulls should also reuse its typed_value Arc
4123        let some_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true));
4124        let some_nulls_result = variant_get(
4125            &variant_array,
4126            GetOptions::new_with_path(VariantPath::from("some_nulls"))
4127                .with_as_type(Some(some_nulls_field_ref)),
4128        )
4129        .unwrap();
4130        assert!(Arc::ptr_eq(&some_nulls_result, &some_nulls_erased));
4131
4132        // Case 3: struct column should return a StructArray composed from the nested field
4133        let struct_child_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]);
4134        let struct_field_ref = FieldRef::from(Field::new(
4135            "result",
4136            DataType::Struct(struct_child_fields.clone()),
4137            true,
4138        ));
4139        let struct_result = variant_get(
4140            &variant_array,
4141            GetOptions::new_with_path(VariantPath::from("struct_field"))
4142                .with_as_type(Some(struct_field_ref)),
4143        )
4144        .unwrap();
4145        let struct_array = struct_result
4146            .as_any()
4147            .downcast_ref::<StructArray>()
4148            .unwrap();
4149        assert_eq!(struct_array.len(), 3);
4150        assert_eq!(struct_array.null_count(), 0);
4151
4152        let inner_values_result = struct_array
4153            .column(0)
4154            .as_any()
4155            .downcast_ref::<Int32Array>()
4156            .unwrap();
4157        assert_eq!(inner_values_result.len(), 3);
4158        assert_eq!(inner_values_result.value(0), 111);
4159        assert!(inner_values_result.is_null(1));
4160        assert_eq!(inner_values_result.value(2), 333);
4161    }
4162
4163    #[test]
4164    fn test_variant_get_list_like_safe_cast() {
4165        let string_array: ArrayRef = Arc::new(StringArray::from(vec![
4166            r#"{"outer":{"list":[1, "two", 3]}}"#,
4167            r#"{"outer":{"list":"not a list"}}"#,
4168        ]));
4169        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
4170
4171        let value_array: ArrayRef = {
4172            let mut builder = VariantBuilder::new();
4173            builder.append_value("two");
4174            let (_, value_bytes) = builder.finish();
4175            Arc::new(BinaryViewArray::from(vec![
4176                None,
4177                Some(value_bytes.as_slice()),
4178                None,
4179            ]))
4180        };
4181        let typed_value_array: ArrayRef = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
4182        let struct_fields = Fields::from(vec![
4183            Field::new("value", DataType::BinaryView, true),
4184            Field::new("typed_value", DataType::Int64, true),
4185        ]);
4186        let struct_array: ArrayRef = Arc::new(
4187            StructArray::try_new(
4188                struct_fields.clone(),
4189                vec![value_array.clone(), typed_value_array.clone()],
4190                None,
4191            )
4192            .unwrap(),
4193        );
4194
4195        let request_field = Arc::new(Field::new("item", DataType::Int64, true));
4196        let result_field = Arc::new(Field::new("item", DataType::Struct(struct_fields), true));
4197
4198        let expectations = vec![
4199            (
4200                DataType::List(request_field.clone()),
4201                Arc::new(ListArray::new(
4202                    result_field.clone(),
4203                    OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 3])),
4204                    struct_array.clone(),
4205                    Some(NullBuffer::from(vec![true, false])),
4206                )) as ArrayRef,
4207            ),
4208            (
4209                DataType::LargeList(request_field.clone()),
4210                Arc::new(LargeListArray::new(
4211                    result_field.clone(),
4212                    OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 3])),
4213                    struct_array.clone(),
4214                    Some(NullBuffer::from(vec![true, false])),
4215                )) as ArrayRef,
4216            ),
4217            (
4218                DataType::ListView(request_field.clone()),
4219                Arc::new(ListViewArray::new(
4220                    result_field.clone(),
4221                    ScalarBuffer::from(vec![0, 3]),
4222                    ScalarBuffer::from(vec![3, 0]),
4223                    struct_array.clone(),
4224                    Some(NullBuffer::from(vec![true, false])),
4225                )) as ArrayRef,
4226            ),
4227            (
4228                DataType::LargeListView(request_field),
4229                Arc::new(LargeListViewArray::new(
4230                    result_field,
4231                    ScalarBuffer::from(vec![0, 3]),
4232                    ScalarBuffer::from(vec![3, 0]),
4233                    struct_array,
4234                    Some(NullBuffer::from(vec![true, false])),
4235                )) as ArrayRef,
4236            ),
4237        ];
4238
4239        for (request_type, expected) in expectations {
4240            let options = GetOptions::new_with_path(VariantPath::from("outer").join("list"))
4241                .with_as_type(Some(FieldRef::from(Field::new(
4242                    "result",
4243                    request_type.clone(),
4244                    true,
4245                ))));
4246
4247            let result = variant_get(&variant_array, options).unwrap();
4248            assert_eq!(result.data_type(), expected.data_type());
4249            assert_eq!(&result, &expected);
4250        }
4251
4252        for (idx, expected) in [
4253            (0, vec![Some(1), None]),
4254            (1, vec![None, None]),
4255            (2, vec![Some(3), None]),
4256        ] {
4257            let index_options =
4258                GetOptions::new_with_path(VariantPath::from("outer").join("list").join(idx))
4259                    .with_as_type(Some(FieldRef::from(Field::new(
4260                        "result",
4261                        DataType::Int64,
4262                        true,
4263                    ))));
4264            let index_result = variant_get(&variant_array, index_options).unwrap();
4265            let index_expected: ArrayRef = Arc::new(Int64Array::from(expected));
4266            assert_eq!(&index_result, &index_expected);
4267        }
4268    }
4269
4270    #[test]
4271    fn test_variant_get_list_like_unsafe_cast_errors_on_element_mismatch() {
4272        let string_array: ArrayRef =
4273            Arc::new(StringArray::from(vec![r#"[1, "two", 3]"#, "[4, 5]"]));
4274        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
4275        let cast_options = CastOptions {
4276            safe: false,
4277            ..Default::default()
4278        };
4279
4280        let item_field = Arc::new(Field::new("item", DataType::Int64, true));
4281        let request_types = vec![
4282            DataType::List(item_field.clone()),
4283            DataType::LargeList(item_field.clone()),
4284            DataType::ListView(item_field.clone()),
4285            DataType::LargeListView(item_field),
4286        ];
4287
4288        for request_type in request_types {
4289            let options = GetOptions::new()
4290                .with_as_type(Some(FieldRef::from(Field::new(
4291                    "result",
4292                    request_type.clone(),
4293                    true,
4294                ))))
4295                .with_cast_options(cast_options.clone());
4296
4297            let err = variant_get(&variant_array, options).unwrap_err();
4298            assert!(
4299                err.to_string()
4300                    .contains("Failed to extract primitive of type Int64")
4301            );
4302        }
4303    }
4304
4305    #[test]
4306    fn test_variant_get_list_like_unsafe_cast_errors_on_non_list() {
4307        let string_array: ArrayRef = Arc::new(StringArray::from(vec!["[1, 2]", "\"not a list\""]));
4308        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
4309        let cast_options = CastOptions {
4310            safe: false,
4311            ..Default::default()
4312        };
4313        let item_field = Arc::new(Field::new("item", Int64, true));
4314        let data_types = vec![
4315            DataType::List(item_field.clone()),
4316            DataType::LargeList(item_field.clone()),
4317            DataType::ListView(item_field.clone()),
4318            DataType::LargeListView(item_field),
4319        ];
4320
4321        for data_type in data_types {
4322            let options = GetOptions::new()
4323                .with_as_type(Some(FieldRef::from(Field::new("result", data_type, true))))
4324                .with_cast_options(cast_options.clone());
4325
4326            let err = variant_get(&variant_array, options).unwrap_err();
4327            assert!(
4328                err.to_string()
4329                    .contains("Failed to extract list from variant"),
4330            );
4331        }
4332    }
4333
4334    #[test]
4335    fn test_variant_get_fixed_size_list_not_implemented() {
4336        let string_array: ArrayRef = Arc::new(StringArray::from(vec!["[1, 2]", "\"not a list\""]));
4337        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
4338        let item_field = Arc::new(Field::new("item", Int64, true));
4339        for safe in [true, false] {
4340            let options = GetOptions::new()
4341                .with_as_type(Some(FieldRef::from(Field::new(
4342                    "result",
4343                    DataType::FixedSizeList(item_field.clone(), 2),
4344                    true,
4345                ))))
4346                .with_cast_options(CastOptions {
4347                    safe,
4348                    ..Default::default()
4349                });
4350
4351            let err = variant_get(&variant_array, options).unwrap_err();
4352            assert!(
4353                err.to_string()
4354                    .contains("Converting unshredded variant arrays to arrow fixed-size lists")
4355            );
4356        }
4357    }
4358}