parquet_variant_compute/
variant_get.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use arrow::{
18    array::{self, Array, ArrayRef, BinaryViewArray, StructArray},
19    compute::CastOptions,
20    datatypes::Field,
21    error::Result,
22};
23use arrow_schema::{ArrowError, DataType, FieldRef};
24use parquet_variant::{VariantPath, VariantPathElement};
25
26use crate::VariantArray;
27use crate::variant_array::BorrowedShreddingState;
28use crate::variant_to_arrow::make_variant_to_arrow_row_builder;
29
30use arrow::array::AsArray;
31use std::sync::Arc;
32
33pub(crate) enum ShreddedPathStep<'a> {
34    /// Path step succeeded, return the new shredding state
35    Success(BorrowedShreddingState<'a>),
36    /// The path element is not present in the `typed_value` column and there is no `value` column,
37    /// so we know it does not exist. It, and all paths under it, are all-NULL.
38    Missing,
39    /// The path element is not present in the `typed_value` column and must be retrieved from the `value`
40    /// column instead. The caller should be prepared to handle any value, including the requested
41    /// type, an arbitrary "wrong" type, or `Variant::Null`.
42    NotShredded,
43}
44
45/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try to take one path step
46/// deeper. For a `VariantPathElement::Field`, the step fails if there is no `typed_value` at this
47/// level, or if `typed_value` is not a struct, or if the requested field name does not exist.
48///
49/// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe not even possible.
50pub(crate) fn follow_shredded_path_element<'a>(
51    shredding_state: &BorrowedShreddingState<'a>,
52    path_element: &VariantPathElement<'_>,
53    cast_options: &CastOptions,
54) -> Result<ShreddedPathStep<'a>> {
55    // If the requested path element is not present in `typed_value`, and `value` is missing, then
56    // we know it does not exist; it, and all paths under it, are all-NULL.
57    let missing_path_step = || match shredding_state.value_field() {
58        Some(_) => ShreddedPathStep::NotShredded,
59        None => ShreddedPathStep::Missing,
60    };
61
62    let Some(typed_value) = shredding_state.typed_value_field() else {
63        return Ok(missing_path_step());
64    };
65
66    match path_element {
67        VariantPathElement::Field { name } => {
68            // Try to step into the requested field name of a struct.
69            // First, try to downcast to StructArray
70            let Some(struct_array) = typed_value.as_any().downcast_ref::<StructArray>() else {
71                // Downcast failure - if strict cast options are enabled, this should be an error
72                if !cast_options.safe {
73                    return Err(ArrowError::CastError(format!(
74                        "Cannot access field '{}' on non-struct type: {}",
75                        name,
76                        typed_value.data_type()
77                    )));
78                }
79                // With safe cast options, return NULL (missing_path_step)
80                return Ok(missing_path_step());
81            };
82
83            // Now try to find the column - missing column in a present struct is just missing data
84            let Some(field) = struct_array.column_by_name(name) else {
85                // Missing column in a present struct is just missing, not wrong - return Ok
86                return Ok(missing_path_step());
87            };
88
89            let struct_array = field.as_struct_opt().ok_or_else(|| {
90                // TODO: Should we blow up? Or just end the traversal and let the normal
91                // variant pathing code sort out the mess that it must anyway be
92                // prepared to handle?
93                ArrowError::InvalidArgumentError(format!(
94                    "Expected Struct array while following path, got {}",
95                    field.data_type(),
96                ))
97            })?;
98
99            let state = BorrowedShreddingState::try_from(struct_array)?;
100            Ok(ShreddedPathStep::Success(state))
101        }
102        VariantPathElement::Index { .. } => {
103            // TODO: Support array indexing. Among other things, it will require slicing not
104            // only the array we have here, but also the corresponding metadata and null masks.
105            Err(ArrowError::NotYetImplemented(
106                "Pathing into shredded variant array index".into(),
107            ))
108        }
109    }
110}
111
112/// Follows the given path as far as possible through shredded variant fields. If the path ends on a
113/// shredded field, return it directly. Otherwise, use a row shredder to follow the rest of the path
114/// and extract the requested value on a per-row basis.
115fn shredded_get_path(
116    input: &VariantArray,
117    path: &[VariantPathElement<'_>],
118    as_field: Option<&Field>,
119    cast_options: &CastOptions,
120) -> Result<ArrayRef> {
121    // Helper that creates a new VariantArray from the given nested value and typed_value columns,
122    // properly accounting for accumulated nulls from path traversal
123    let make_target_variant =
124        |value: Option<BinaryViewArray>,
125         typed_value: Option<ArrayRef>,
126         accumulated_nulls: Option<arrow::buffer::NullBuffer>| {
127            let metadata = input.metadata_field().clone();
128            VariantArray::from_parts(metadata, value, typed_value, accumulated_nulls)
129        };
130
131    // Helper that shreds a VariantArray to a specific type.
132    let shred_basic_variant =
133        |target: VariantArray, path: VariantPath<'_>, as_field: Option<&Field>| {
134            let as_type = as_field.map(|f| f.data_type());
135            let mut builder = make_variant_to_arrow_row_builder(
136                target.metadata_field(),
137                path,
138                as_type,
139                cast_options,
140                target.len(),
141            )?;
142            for i in 0..target.len() {
143                if target.is_null(i) {
144                    builder.append_null()?;
145                } else if !cast_options.safe {
146                    let value = target.try_value(i)?;
147                    builder.append_value(value)?;
148                } else {
149                    let _ = match target.try_value(i) {
150                        Ok(v) => builder.append_value(v)?,
151                        Err(_) => {
152                            builder.append_null()?;
153                            false // add this to make match arms have the same return type
154                        }
155                    };
156                }
157            }
158            builder.finish()
159        };
160
161    // Peel away the prefix of path elements that traverses the shredded parts of this variant
162    // column. Shredding will traverse the rest of the path on a per-row basis.
163    let mut shredding_state = input.shredding_state().borrow();
164    let mut accumulated_nulls = input.inner().nulls().cloned();
165    let mut path_index = 0;
166    for path_element in path {
167        match follow_shredded_path_element(&shredding_state, path_element, cast_options)? {
168            ShreddedPathStep::Success(state) => {
169                // Union nulls from the typed_value we just accessed
170                if let Some(typed_value) = shredding_state.typed_value_field() {
171                    accumulated_nulls = arrow::buffer::NullBuffer::union(
172                        accumulated_nulls.as_ref(),
173                        typed_value.nulls(),
174                    );
175                }
176                shredding_state = state;
177                path_index += 1;
178                continue;
179            }
180            ShreddedPathStep::Missing => {
181                let num_rows = input.len();
182                let arr = match as_field.map(|f| f.data_type()) {
183                    Some(data_type) => Arc::new(array::new_null_array(data_type, num_rows)) as _,
184                    None => Arc::new(array::NullArray::new(num_rows)) as _,
185                };
186                return Ok(arr);
187            }
188            ShreddedPathStep::NotShredded => {
189                let target = make_target_variant(
190                    shredding_state.value_field().cloned(),
191                    None,
192                    accumulated_nulls,
193                );
194                return shred_basic_variant(target, path[path_index..].into(), as_field);
195            }
196        };
197    }
198
199    // Path exhausted! Create a new `VariantArray` for the location we landed on.
200    let target = make_target_variant(
201        shredding_state.value_field().cloned(),
202        shredding_state.typed_value_field().cloned(),
203        accumulated_nulls,
204    );
205
206    // If our caller did not request any specific type, we can just return whatever we landed on.
207    let Some(as_field) = as_field else {
208        return Ok(ArrayRef::from(target));
209    };
210
211    // Structs are special. Recurse into each field separately, hoping to follow the shredding even
212    // further, and build up the final struct from those individually shredded results.
213    if let DataType::Struct(fields) = as_field.data_type() {
214        let children = fields
215            .iter()
216            .map(|field| {
217                shredded_get_path(
218                    &target,
219                    &[VariantPathElement::from(field.name().as_str())],
220                    Some(field),
221                    cast_options,
222                )
223            })
224            .collect::<Result<Vec<_>>>()?;
225
226        let struct_nulls = target.nulls().cloned();
227
228        return Ok(Arc::new(StructArray::try_new(
229            fields.clone(),
230            children,
231            struct_nulls,
232        )?));
233    }
234
235    // Not a struct, so directly shred the variant as the requested type
236    shred_basic_variant(target, VariantPath::default(), Some(as_field))
237}
238
239/// Returns an array with the specified path extracted from the variant values.
240///
241/// The return array type depends on the `as_type` field of the options parameter
242/// 1. `as_type: None`: a VariantArray is returned. The values in this new VariantArray will point
243///    to the specified path.
244/// 2. `as_type: Some(<specific field>)`: an array of the specified type is returned.
245///
246/// TODO: How would a caller request a struct or list type where the fields/elements can be any
247/// variant? Caller can pass None as the requested type to fetch a specific path, but it would
248/// quickly become annoying (and inefficient) to call `variant_get` for each leaf value in a struct or
249/// list and then try to assemble the results.
250pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
251    let variant_array = VariantArray::try_new(input)?;
252
253    let GetOptions {
254        as_type,
255        path,
256        cast_options,
257    } = options;
258
259    shredded_get_path(&variant_array, &path, as_type.as_deref(), &cast_options)
260}
261
262/// Controls the action of the variant_get kernel.
263#[derive(Debug, Clone, Default)]
264pub struct GetOptions<'a> {
265    /// What path to extract
266    pub path: VariantPath<'a>,
267    /// if `as_type` is None, the returned array will itself be a VariantArray.
268    ///
269    /// if `as_type` is `Some(type)` the field is returned as the specified type.
270    pub as_type: Option<FieldRef>,
271    /// Controls the casting behavior (e.g. error vs substituting null on cast error).
272    pub cast_options: CastOptions<'a>,
273}
274
275impl<'a> GetOptions<'a> {
276    /// Construct default options to get the specified path as a variant.
277    pub fn new() -> Self {
278        Default::default()
279    }
280
281    /// Construct options to get the specified path as a variant.
282    pub fn new_with_path(path: VariantPath<'a>) -> Self {
283        Self {
284            path,
285            as_type: None,
286            cast_options: Default::default(),
287        }
288    }
289
290    /// Specify the type to return.
291    pub fn with_as_type(mut self, as_type: Option<FieldRef>) -> Self {
292        self.as_type = as_type;
293        self
294    }
295
296    /// Specify the cast options to use when casting to the specified type.
297    pub fn with_cast_options(mut self, cast_options: CastOptions<'a>) -> Self {
298        self.cast_options = cast_options;
299        self
300    }
301}
302
303#[cfg(test)]
304mod test {
305    use std::str::FromStr;
306    use std::sync::Arc;
307
308    use super::{GetOptions, variant_get};
309    use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
310    use crate::{VariantArray, VariantArrayBuilder, json_to_variant};
311    use arrow::array::{
312        Array, ArrayRef, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
313        Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, Float32Array,
314        Float64Array, Int8Array, Int16Array, Int32Array, Int64Array, LargeBinaryArray,
315        LargeStringArray, NullBuilder, StringArray, StringViewArray, StructArray,
316        Time64MicrosecondArray,
317    };
318    use arrow::buffer::NullBuffer;
319    use arrow::compute::CastOptions;
320    use arrow::datatypes::DataType::{Int16, Int32, Int64};
321    use arrow::datatypes::i256;
322    use arrow::util::display::FormatOptions;
323    use arrow_schema::DataType::{Boolean, Float32, Float64, Int8};
324    use arrow_schema::{DataType, Field, FieldRef, Fields, IntervalUnit, TimeUnit};
325    use chrono::DateTime;
326    use parquet_variant::{
327        EMPTY_VARIANT_METADATA_BYTES, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16,
328        VariantDecimalType, VariantPath,
329    };
330
331    fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) {
332        // Create input array from JSON string
333        let input_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(input_json)]));
334        let input_variant_array_ref = ArrayRef::from(json_to_variant(&input_array_ref).unwrap());
335
336        let result =
337            variant_get(&input_variant_array_ref, GetOptions::new_with_path(path)).unwrap();
338
339        // Create expected array from JSON string
340        let expected_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(expected_json)]));
341        let expected_variant_array = json_to_variant(&expected_array_ref).unwrap();
342
343        let result_array = VariantArray::try_new(&result).unwrap();
344        assert_eq!(
345            result_array.len(),
346            1,
347            "Expected result array to have length 1"
348        );
349        assert!(
350            result_array.nulls().is_none(),
351            "Expected no nulls in result array"
352        );
353        let result_variant = result_array.value(0);
354        let expected_variant = expected_variant_array.value(0);
355        assert_eq!(
356            result_variant, expected_variant,
357            "Result variant does not match expected variant"
358        );
359    }
360
361    #[test]
362    fn get_primitive_variant_field() {
363        single_variant_get_test(
364            r#"{"some_field": 1234}"#,
365            VariantPath::from("some_field"),
366            "1234",
367        );
368    }
369
370    #[test]
371    fn get_primitive_variant_list_index() {
372        single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
373    }
374
375    #[test]
376    fn get_primitive_variant_inside_object_of_object() {
377        single_variant_get_test(
378            r#"{"top_level_field": {"inner_field": 1234}}"#,
379            VariantPath::from("top_level_field").join("inner_field"),
380            "1234",
381        );
382    }
383
384    #[test]
385    fn get_primitive_variant_inside_list_of_object() {
386        single_variant_get_test(
387            r#"[{"some_field": 1234}]"#,
388            VariantPath::from(0).join("some_field"),
389            "1234",
390        );
391    }
392
393    #[test]
394    fn get_primitive_variant_inside_object_of_list() {
395        single_variant_get_test(
396            r#"{"some_field": [1234]}"#,
397            VariantPath::from("some_field").join(0),
398            "1234",
399        );
400    }
401
402    #[test]
403    fn get_complex_variant() {
404        single_variant_get_test(
405            r#"{"top_level_field": {"inner_field": 1234}}"#,
406            VariantPath::from("top_level_field"),
407            r#"{"inner_field": 1234}"#,
408        );
409    }
410
411    /// Partial Shredding: extract a value as a VariantArray
412    macro_rules! numeric_partially_shredded_test {
413        ($primitive_type:ty, $data_fn:ident) => {
414            let array = $data_fn();
415            let options = GetOptions::new();
416            let result = variant_get(&array, options).unwrap();
417
418            // expect the result is a VariantArray
419            let result = VariantArray::try_new(&result).unwrap();
420            assert_eq!(result.len(), 4);
421
422            // Expect the values are the same as the original values
423            assert_eq!(
424                result.value(0),
425                Variant::from(<$primitive_type>::try_from(34u8).unwrap())
426            );
427            assert!(!result.is_valid(1));
428            assert_eq!(result.value(2), Variant::from("n/a"));
429            assert_eq!(
430                result.value(3),
431                Variant::from(<$primitive_type>::try_from(100u8).unwrap())
432            );
433        };
434    }
435
436    macro_rules! partially_shredded_variant_array_gen {
437        ($func_name:ident,  $typed_value_array_gen: expr) => {
438            fn $func_name() -> ArrayRef {
439                let (metadata, string_value) = {
440                    let mut builder = parquet_variant::VariantBuilder::new();
441                    builder.append_value("n/a");
442                    builder.finish()
443                };
444
445                let nulls = NullBuffer::from(vec![
446                    true,  // row 0 non null
447                    false, // row 1 is null
448                    true,  // row 2 non null
449                    true,  // row 3 non null
450                ]);
451
452                // metadata is the same for all rows
453                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
454
455                // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
456                // about why row1 is an empty but non null, value.
457                let values = BinaryViewArray::from(vec![
458                    None,                // row 0 is shredded, so no value
459                    Some(b"" as &[u8]),  // row 1 is null, so empty value (why?)
460                    Some(&string_value), // copy the string value "N/A"
461                    None,                // row 3 is shredded, so no value
462                ]);
463
464                let typed_value = $typed_value_array_gen();
465
466                let struct_array = StructArrayBuilder::new()
467                    .with_field("metadata", Arc::new(metadata), false)
468                    .with_field("typed_value", Arc::new(typed_value), true)
469                    .with_field("value", Arc::new(values), true)
470                    .with_nulls(nulls)
471                    .build();
472                ArrayRef::from(
473                    VariantArray::try_new(&struct_array).expect("should create variant array"),
474                )
475            }
476        };
477    }
478
479    #[test]
480    fn get_variant_partially_shredded_int8_as_variant() {
481        numeric_partially_shredded_test!(i8, partially_shredded_int8_variant_array);
482    }
483
484    #[test]
485    fn get_variant_partially_shredded_int16_as_variant() {
486        numeric_partially_shredded_test!(i16, partially_shredded_int16_variant_array);
487    }
488
489    #[test]
490    fn get_variant_partially_shredded_int32_as_variant() {
491        numeric_partially_shredded_test!(i32, partially_shredded_int32_variant_array);
492    }
493
494    #[test]
495    fn get_variant_partially_shredded_int64_as_variant() {
496        numeric_partially_shredded_test!(i64, partially_shredded_int64_variant_array);
497    }
498
499    #[test]
500    fn get_variant_partially_shredded_float32_as_variant() {
501        numeric_partially_shredded_test!(f32, partially_shredded_float32_variant_array);
502    }
503
504    #[test]
505    fn get_variant_partially_shredded_float64_as_variant() {
506        numeric_partially_shredded_test!(f64, partially_shredded_float64_variant_array);
507    }
508
509    #[test]
510    fn get_variant_partially_shredded_bool_as_variant() {
511        let array = partially_shredded_bool_variant_array();
512        let options = GetOptions::new();
513        let result = variant_get(&array, options).unwrap();
514
515        // expect the result is a VariantArray
516        let result = VariantArray::try_new(&result).unwrap();
517        assert_eq!(result.len(), 4);
518
519        // Expect the values are the same as the original values
520        assert_eq!(result.value(0), Variant::from(true));
521        assert!(!result.is_valid(1));
522        assert_eq!(result.value(2), Variant::from("n/a"));
523        assert_eq!(result.value(3), Variant::from(false));
524    }
525
526    #[test]
527    fn get_variant_partially_shredded_utf8_as_variant() {
528        let array = partially_shredded_utf8_variant_array();
529        let options = GetOptions::new();
530        let result = variant_get(&array, options).unwrap();
531
532        // expect the result is a VariantArray
533        let result = VariantArray::try_new(&result).unwrap();
534        assert_eq!(result.len(), 4);
535
536        // Expect the values are the same as the original values
537        assert_eq!(result.value(0), Variant::from("hello"));
538        assert!(!result.is_valid(1));
539        assert_eq!(result.value(2), Variant::from("n/a"));
540        assert_eq!(result.value(3), Variant::from("world"));
541    }
542
543    partially_shredded_variant_array_gen!(partially_shredded_binary_view_variant_array, || {
544        BinaryViewArray::from(vec![
545            Some(&[1u8, 2u8, 3u8][..]), // row 0 is shredded
546            None,                       // row 1 is null
547            None,                       // row 2 is a string
548            Some(&[4u8, 5u8, 6u8][..]), // row 3 is shredded
549        ])
550    });
551
552    #[test]
553    fn get_variant_partially_shredded_date32_as_variant() {
554        let array = partially_shredded_date32_variant_array();
555        let options = GetOptions::new();
556        let result = variant_get(&array, options).unwrap();
557
558        // expect the result is a VariantArray
559        let result = VariantArray::try_new(&result).unwrap();
560        assert_eq!(result.len(), 4);
561
562        // Expect the values are the same as the original values
563        use chrono::NaiveDate;
564        let date1 = NaiveDate::from_ymd_opt(2025, 9, 17).unwrap();
565        let date2 = NaiveDate::from_ymd_opt(2025, 9, 9).unwrap();
566        assert_eq!(result.value(0), Variant::from(date1));
567        assert!(!result.is_valid(1));
568        assert_eq!(result.value(2), Variant::from("n/a"));
569        assert_eq!(result.value(3), Variant::from(date2));
570    }
571
572    #[test]
573    fn get_variant_partially_shredded_binary_view_as_variant() {
574        let array = partially_shredded_binary_view_variant_array();
575        let options = GetOptions::new();
576        let result = variant_get(&array, options).unwrap();
577
578        // expect the result is a VariantArray
579        let result = VariantArray::try_new(&result).unwrap();
580        assert_eq!(result.len(), 4);
581
582        // Expect the values are the same as the original values
583        assert_eq!(result.value(0), Variant::from(&[1u8, 2u8, 3u8][..]));
584        assert!(!result.is_valid(1));
585        assert_eq!(result.value(2), Variant::from("n/a"));
586        assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..]));
587    }
588
589    /// Shredding: extract a value as an Int32Array
590    #[test]
591    fn get_variant_shredded_int32_as_int32_safe_cast() {
592        // Extract the typed value as Int32Array
593        let array = partially_shredded_int32_variant_array();
594        // specify we want the typed value as Int32
595        let field = Field::new("typed_value", DataType::Int32, true);
596        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
597        let result = variant_get(&array, options).unwrap();
598        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
599            Some(34),
600            None,
601            None, // "n/a" is not an Int32 so converted to null
602            Some(100),
603        ]));
604        assert_eq!(&result, &expected)
605    }
606
607    /// Shredding: extract a value as an Int32Array, unsafe cast (should error on "n/a")
608    #[test]
609    fn get_variant_shredded_int32_as_int32_unsafe_cast() {
610        // Extract the typed value as Int32Array
611        let array = partially_shredded_int32_variant_array();
612        let field = Field::new("typed_value", DataType::Int32, true);
613        let cast_options = CastOptions {
614            safe: false, // unsafe cast
615            ..Default::default()
616        };
617        let options = GetOptions::new()
618            .with_as_type(Some(FieldRef::from(field)))
619            .with_cast_options(cast_options);
620
621        let err = variant_get(&array, options).unwrap_err();
622        // TODO make this error message nicer (not Debug format)
623        assert_eq!(
624            err.to_string(),
625            "Cast error: Failed to extract primitive of type Int32 from variant ShortString(ShortString(\"n/a\")) at path VariantPath([])"
626        );
627    }
628
629    /// Perfect Shredding: extract the typed value as a VariantArray
630    macro_rules! numeric_perfectly_shredded_test {
631        ($primitive_type:ty, $data_fn:ident) => {
632            let array = $data_fn();
633            let options = GetOptions::new();
634            let result = variant_get(&array, options).unwrap();
635
636            // expect the result is a VariantArray
637            let result = VariantArray::try_new(&result).unwrap();
638            assert_eq!(result.len(), 3);
639
640            // Expect the values are the same as the original values
641            assert_eq!(
642                result.value(0),
643                Variant::from(<$primitive_type>::try_from(1u8).unwrap())
644            );
645            assert_eq!(
646                result.value(1),
647                Variant::from(<$primitive_type>::try_from(2u8).unwrap())
648            );
649            assert_eq!(
650                result.value(2),
651                Variant::from(<$primitive_type>::try_from(3u8).unwrap())
652            );
653        };
654    }
655
656    #[test]
657    fn get_variant_perfectly_shredded_int8_as_variant() {
658        numeric_perfectly_shredded_test!(i8, perfectly_shredded_int8_variant_array);
659    }
660
661    #[test]
662    fn get_variant_perfectly_shredded_int16_as_variant() {
663        numeric_perfectly_shredded_test!(i16, perfectly_shredded_int16_variant_array);
664    }
665
666    #[test]
667    fn get_variant_perfectly_shredded_int32_as_variant() {
668        numeric_perfectly_shredded_test!(i32, perfectly_shredded_int32_variant_array);
669    }
670
671    #[test]
672    fn get_variant_perfectly_shredded_int64_as_variant() {
673        numeric_perfectly_shredded_test!(i64, perfectly_shredded_int64_variant_array);
674    }
675
676    #[test]
677    fn get_variant_perfectly_shredded_float32_as_variant() {
678        numeric_perfectly_shredded_test!(f32, perfectly_shredded_float32_variant_array);
679    }
680
681    #[test]
682    fn get_variant_perfectly_shredded_float64_as_variant() {
683        numeric_perfectly_shredded_test!(f64, perfectly_shredded_float64_variant_array);
684    }
685
686    /// AllNull: extract a value as a VariantArray
687    #[test]
688    fn get_variant_all_null_as_variant() {
689        let array = all_null_variant_array();
690        let options = GetOptions::new();
691        let result = variant_get(&array, options).unwrap();
692
693        // expect the result is a VariantArray
694        let result = VariantArray::try_new(&result).unwrap();
695        assert_eq!(result.len(), 3);
696
697        // All values should be null
698        assert!(!result.is_valid(0));
699        assert!(!result.is_valid(1));
700        assert!(!result.is_valid(2));
701    }
702
703    /// AllNull: extract a value as an Int32Array
704    #[test]
705    fn get_variant_all_null_as_int32() {
706        let array = all_null_variant_array();
707        // specify we want the typed value as Int32
708        let field = Field::new("typed_value", DataType::Int32, true);
709        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
710        let result = variant_get(&array, options).unwrap();
711
712        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
713            Option::<i32>::None,
714            Option::<i32>::None,
715            Option::<i32>::None,
716        ]));
717        assert_eq!(&result, &expected)
718    }
719
720    macro_rules! perfectly_shredded_to_arrow_primitive_test {
721        ($name:ident, $primitive_type:expr, $perfectly_shredded_array_gen_fun:ident, $expected_array:expr) => {
722            #[test]
723            fn $name() {
724                let array = $perfectly_shredded_array_gen_fun();
725                let field = Field::new("typed_value", $primitive_type, true);
726                let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
727                let result = variant_get(&array, options).unwrap();
728                let expected_array: ArrayRef = Arc::new($expected_array);
729                assert_eq!(&result, &expected_array);
730            }
731        };
732    }
733
734    perfectly_shredded_to_arrow_primitive_test!(
735        get_variant_perfectly_shredded_int18_as_int8,
736        Int8,
737        perfectly_shredded_int8_variant_array,
738        Int8Array::from(vec![Some(1), Some(2), Some(3)])
739    );
740
741    perfectly_shredded_to_arrow_primitive_test!(
742        get_variant_perfectly_shredded_int16_as_int16,
743        Int16,
744        perfectly_shredded_int16_variant_array,
745        Int16Array::from(vec![Some(1), Some(2), Some(3)])
746    );
747
748    perfectly_shredded_to_arrow_primitive_test!(
749        get_variant_perfectly_shredded_int32_as_int32,
750        Int32,
751        perfectly_shredded_int32_variant_array,
752        Int32Array::from(vec![Some(1), Some(2), Some(3)])
753    );
754
755    perfectly_shredded_to_arrow_primitive_test!(
756        get_variant_perfectly_shredded_int64_as_int64,
757        Int64,
758        perfectly_shredded_int64_variant_array,
759        Int64Array::from(vec![Some(1), Some(2), Some(3)])
760    );
761
762    perfectly_shredded_to_arrow_primitive_test!(
763        get_variant_perfectly_shredded_float32_as_float32,
764        Float32,
765        perfectly_shredded_float32_variant_array,
766        Float32Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
767    );
768
769    perfectly_shredded_to_arrow_primitive_test!(
770        get_variant_perfectly_shredded_float64_as_float64,
771        Float64,
772        perfectly_shredded_float64_variant_array,
773        Float64Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
774    );
775
776    perfectly_shredded_to_arrow_primitive_test!(
777        get_variant_perfectly_shredded_boolean_as_boolean,
778        Boolean,
779        perfectly_shredded_bool_variant_array,
780        BooleanArray::from(vec![Some(true), Some(false), Some(true)])
781    );
782
783    perfectly_shredded_to_arrow_primitive_test!(
784        get_variant_perfectly_shredded_utf8_as_utf8,
785        DataType::Utf8,
786        perfectly_shredded_utf8_variant_array,
787        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
788    );
789
790    perfectly_shredded_to_arrow_primitive_test!(
791        get_variant_perfectly_shredded_large_utf8_as_utf8,
792        DataType::Utf8,
793        perfectly_shredded_large_utf8_variant_array,
794        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
795    );
796
797    perfectly_shredded_to_arrow_primitive_test!(
798        get_variant_perfectly_shredded_utf8_view_as_utf8,
799        DataType::Utf8,
800        perfectly_shredded_utf8_view_variant_array,
801        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
802    );
803
804    macro_rules! perfectly_shredded_variant_array_fn {
805        ($func:ident, $typed_value_gen:expr) => {
806            fn $func() -> ArrayRef {
807                // At the time of writing, the `VariantArrayBuilder` does not support shredding.
808                // so we must construct the array manually.  see https://github.com/apache/arrow-rs/issues/7895
809                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
810                    EMPTY_VARIANT_METADATA_BYTES,
811                    3,
812                ));
813                let typed_value = $typed_value_gen();
814
815                let struct_array = StructArrayBuilder::new()
816                    .with_field("metadata", Arc::new(metadata), false)
817                    .with_field("typed_value", Arc::new(typed_value), true)
818                    .build();
819
820                VariantArray::try_new(&struct_array)
821                    .expect("should create variant array")
822                    .into()
823            }
824        };
825    }
826
827    perfectly_shredded_variant_array_fn!(perfectly_shredded_utf8_variant_array, || {
828        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
829    });
830
831    perfectly_shredded_variant_array_fn!(perfectly_shredded_large_utf8_variant_array, || {
832        LargeStringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
833    });
834
835    perfectly_shredded_variant_array_fn!(perfectly_shredded_utf8_view_variant_array, || {
836        StringViewArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
837    });
838
839    perfectly_shredded_variant_array_fn!(perfectly_shredded_bool_variant_array, || {
840        BooleanArray::from(vec![Some(true), Some(false), Some(true)])
841    });
842
843    /// Return a VariantArray that represents a perfectly "shredded" variant
844    /// for the given typed value.
845    ///
846    /// The schema of the corresponding `StructArray` would look like this:
847    ///
848    /// ```text
849    /// StructArray {
850    ///   metadata: BinaryViewArray,
851    ///   typed_value: Int32Array,
852    /// }
853    /// ```
854    macro_rules! numeric_perfectly_shredded_variant_array_fn {
855        ($func:ident, $array_type:ident, $primitive_type:ty) => {
856            perfectly_shredded_variant_array_fn!($func, || {
857                $array_type::from(vec![
858                    Some(<$primitive_type>::try_from(1u8).unwrap()),
859                    Some(<$primitive_type>::try_from(2u8).unwrap()),
860                    Some(<$primitive_type>::try_from(3u8).unwrap()),
861                ])
862            });
863        };
864    }
865
866    numeric_perfectly_shredded_variant_array_fn!(
867        perfectly_shredded_int8_variant_array,
868        Int8Array,
869        i8
870    );
871    numeric_perfectly_shredded_variant_array_fn!(
872        perfectly_shredded_int16_variant_array,
873        Int16Array,
874        i16
875    );
876    numeric_perfectly_shredded_variant_array_fn!(
877        perfectly_shredded_int32_variant_array,
878        Int32Array,
879        i32
880    );
881    numeric_perfectly_shredded_variant_array_fn!(
882        perfectly_shredded_int64_variant_array,
883        Int64Array,
884        i64
885    );
886    numeric_perfectly_shredded_variant_array_fn!(
887        perfectly_shredded_float32_variant_array,
888        Float32Array,
889        f32
890    );
891    numeric_perfectly_shredded_variant_array_fn!(
892        perfectly_shredded_float64_variant_array,
893        Float64Array,
894        f64
895    );
896
897    perfectly_shredded_variant_array_fn!(
898        perfectly_shredded_timestamp_micro_ntz_variant_array,
899        || {
900            arrow::array::TimestampMicrosecondArray::from(vec![
901                Some(-456000),
902                Some(1758602096000001),
903                Some(1758602096000002),
904            ])
905        }
906    );
907
908    perfectly_shredded_to_arrow_primitive_test!(
909        get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_micro_ntz,
910        DataType::Timestamp(TimeUnit::Microsecond, None),
911        perfectly_shredded_timestamp_micro_ntz_variant_array,
912        arrow::array::TimestampMicrosecondArray::from(vec![
913            Some(-456000),
914            Some(1758602096000001),
915            Some(1758602096000002),
916        ])
917    );
918
919    // test converting micro to nano
920    perfectly_shredded_to_arrow_primitive_test!(
921        get_variant_perfectly_shredded_timestamp_micro_ntz_as_nano_ntz,
922        DataType::Timestamp(TimeUnit::Nanosecond, None),
923        perfectly_shredded_timestamp_micro_ntz_variant_array,
924        arrow::array::TimestampNanosecondArray::from(vec![
925            Some(-456000000),
926            Some(1758602096000001000),
927            Some(1758602096000002000)
928        ])
929    );
930
931    perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_micro_variant_array, || {
932        arrow::array::TimestampMicrosecondArray::from(vec![
933            Some(-456000),
934            Some(1758602096000001),
935            Some(1758602096000002),
936        ])
937        .with_timezone("+00:00")
938    });
939
940    perfectly_shredded_to_arrow_primitive_test!(
941        get_variant_perfectly_shredded_timestamp_micro_as_timestamp_micro,
942        DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("+00:00"))),
943        perfectly_shredded_timestamp_micro_variant_array,
944        arrow::array::TimestampMicrosecondArray::from(vec![
945            Some(-456000),
946            Some(1758602096000001),
947            Some(1758602096000002),
948        ])
949        .with_timezone("+00:00")
950    );
951
952    // test converting micro to nano
953    perfectly_shredded_to_arrow_primitive_test!(
954        get_variant_perfectly_shredded_timestamp_micro_as_nano,
955        DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
956        perfectly_shredded_timestamp_micro_variant_array,
957        arrow::array::TimestampNanosecondArray::from(vec![
958            Some(-456000000),
959            Some(1758602096000001000),
960            Some(1758602096000002000)
961        ])
962        .with_timezone("+00:00")
963    );
964
965    perfectly_shredded_variant_array_fn!(
966        perfectly_shredded_timestamp_nano_ntz_variant_array,
967        || {
968            arrow::array::TimestampNanosecondArray::from(vec![
969                Some(-4999999561),
970                Some(1758602096000000001),
971                Some(1758602096000000002),
972            ])
973        }
974    );
975
976    perfectly_shredded_to_arrow_primitive_test!(
977        get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_nano_ntz,
978        DataType::Timestamp(TimeUnit::Nanosecond, None),
979        perfectly_shredded_timestamp_nano_ntz_variant_array,
980        arrow::array::TimestampNanosecondArray::from(vec![
981            Some(-4999999561),
982            Some(1758602096000000001),
983            Some(1758602096000000002),
984        ])
985    );
986
987    perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_nano_variant_array, || {
988        arrow::array::TimestampNanosecondArray::from(vec![
989            Some(-4999999561),
990            Some(1758602096000000001),
991            Some(1758602096000000002),
992        ])
993        .with_timezone("+00:00")
994    });
995
996    perfectly_shredded_to_arrow_primitive_test!(
997        get_variant_perfectly_shredded_timestamp_nano_as_timestamp_nano,
998        DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
999        perfectly_shredded_timestamp_nano_variant_array,
1000        arrow::array::TimestampNanosecondArray::from(vec![
1001            Some(-4999999561),
1002            Some(1758602096000000001),
1003            Some(1758602096000000002),
1004        ])
1005        .with_timezone("+00:00")
1006    );
1007
1008    perfectly_shredded_variant_array_fn!(perfectly_shredded_date_variant_array, || {
1009        Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
1010    });
1011
1012    perfectly_shredded_to_arrow_primitive_test!(
1013        get_variant_perfectly_shredded_date_as_date,
1014        DataType::Date32,
1015        perfectly_shredded_date_variant_array,
1016        Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
1017    );
1018
1019    perfectly_shredded_variant_array_fn!(perfectly_shredded_time_variant_array, || {
1020        Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
1021    });
1022
1023    perfectly_shredded_to_arrow_primitive_test!(
1024        get_variant_perfectly_shredded_time_as_time,
1025        DataType::Time64(TimeUnit::Microsecond),
1026        perfectly_shredded_time_variant_array,
1027        Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
1028    );
1029
1030    perfectly_shredded_variant_array_fn!(perfectly_shredded_null_variant_array, || {
1031        let mut builder = NullBuilder::new();
1032        builder.append_nulls(3);
1033        builder.finish()
1034    });
1035
1036    perfectly_shredded_to_arrow_primitive_test!(
1037        get_variant_perfectly_shredded_null_as_null,
1038        DataType::Null,
1039        perfectly_shredded_null_variant_array,
1040        arrow::array::NullArray::new(3)
1041    );
1042
1043    perfectly_shredded_variant_array_fn!(perfectly_shredded_null_variant_array_with_int, || {
1044        Int32Array::from(vec![Some(32), Some(64), Some(48)])
1045    });
1046
1047    // We append null values if type miss match happens in safe mode
1048    perfectly_shredded_to_arrow_primitive_test!(
1049        get_variant_perfectly_shredded_null_with_type_missmatch_in_safe_mode,
1050        DataType::Null,
1051        perfectly_shredded_null_variant_array_with_int,
1052        arrow::array::NullArray::new(3)
1053    );
1054
1055    // We'll return an error if type miss match happens in strict mode
1056    #[test]
1057    fn get_variant_perfectly_shredded_null_as_null_with_type_missmatch_in_strict_mode() {
1058        let array = perfectly_shredded_null_variant_array_with_int();
1059        let field = Field::new("typed_value", DataType::Null, true);
1060        let options = GetOptions::new()
1061            .with_as_type(Some(FieldRef::from(field)))
1062            .with_cast_options(CastOptions {
1063                safe: false,
1064                format_options: FormatOptions::default(),
1065            });
1066
1067        let result = variant_get(&array, options);
1068
1069        assert!(result.is_err());
1070        let error_msg = format!("{}", result.unwrap_err());
1071        assert!(
1072            error_msg
1073                .contains("Cast error: Failed to extract primitive of type Null from variant Int32(32) at path VariantPath([])"),
1074            "Expected=[Cast error: Failed to extract primitive of type Null from variant Int32(32) at path VariantPath([])],\
1075                Got error message=[{}]",
1076            error_msg
1077        );
1078    }
1079
1080    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal4_variant_array, || {
1081        Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
1082            .with_precision_and_scale(5, 2)
1083            .unwrap()
1084    });
1085
1086    perfectly_shredded_to_arrow_primitive_test!(
1087        get_variant_perfectly_shredded_decimal4_as_decimal4,
1088        DataType::Decimal32(5, 2),
1089        perfectly_shredded_decimal4_variant_array,
1090        Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
1091            .with_precision_and_scale(5, 2)
1092            .unwrap()
1093    );
1094
1095    perfectly_shredded_variant_array_fn!(
1096        perfectly_shredded_decimal8_variant_array_cast2decimal32,
1097        || {
1098            Decimal64Array::from(vec![Some(123456), Some(145678), Some(-123456)])
1099                .with_precision_and_scale(6, 1)
1100                .unwrap()
1101        }
1102    );
1103
1104    // The input will be cast to Decimal32 when transformed to Variant
1105    // This tests will covert the logic DataType::Decimal64(the original array)
1106    // -> Variant::Decimal4(VariantArray) -> DataType::Decimal64(the result array)
1107    perfectly_shredded_to_arrow_primitive_test!(
1108        get_variant_perfectly_shredded_decimal8_through_decimal32_as_decimal8,
1109        DataType::Decimal64(6, 1),
1110        perfectly_shredded_decimal8_variant_array_cast2decimal32,
1111        Decimal64Array::from(vec![Some(123456), Some(145678), Some(-123456)])
1112            .with_precision_and_scale(6, 1)
1113            .unwrap()
1114    );
1115
1116    // This tests will covert the logic DataType::Decimal64(the original array)
1117    //  -> Variant::Decimal8(VariantArray) -> DataType::Decimal64(the result array)
1118    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal8_variant_array, || {
1119        Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1120            .with_precision_and_scale(10, 1)
1121            .unwrap()
1122    });
1123
1124    perfectly_shredded_to_arrow_primitive_test!(
1125        get_variant_perfectly_shredded_decimal8_as_decimal8,
1126        DataType::Decimal64(10, 1),
1127        perfectly_shredded_decimal8_variant_array,
1128        Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1129            .with_precision_and_scale(10, 1)
1130            .unwrap()
1131    );
1132
1133    // This tests will covert the logic DataType::Decimal128(the original array)
1134    //  -> Variant::Decimal4(VariantArray) -> DataType::Decimal128(the result array)
1135    perfectly_shredded_variant_array_fn!(
1136        perfectly_shredded_decimal16_within_decimal4_variant_array,
1137        || {
1138            Decimal128Array::from(vec![
1139                Some(i128::from(1234589)),
1140                Some(i128::from(2344444)),
1141                Some(i128::from(-1234789)),
1142            ])
1143            .with_precision_and_scale(7, 3)
1144            .unwrap()
1145        }
1146    );
1147
1148    // This tests will covert the logic DataType::Decimal128(the original array)
1149    // -> Variant::Decimal4(VariantArray) -> DataType::Decimal128(the result array)
1150    perfectly_shredded_to_arrow_primitive_test!(
1151        get_variant_perfectly_shredded_decimal16_within_decimal4_as_decimal16,
1152        DataType::Decimal128(7, 3),
1153        perfectly_shredded_decimal16_within_decimal4_variant_array,
1154        Decimal128Array::from(vec![
1155            Some(i128::from(1234589)),
1156            Some(i128::from(2344444)),
1157            Some(i128::from(-1234789)),
1158        ])
1159        .with_precision_and_scale(7, 3)
1160        .unwrap()
1161    );
1162
1163    perfectly_shredded_variant_array_fn!(
1164        perfectly_shredded_decimal16_within_decimal8_variant_array,
1165        || {
1166            Decimal128Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1167                .with_precision_and_scale(10, 1)
1168                .unwrap()
1169        }
1170    );
1171
1172    // This tests will covert the logic DataType::Decimal128(the original array)
1173    // -> Variant::Decimal8(VariantArray) -> DataType::Decimal128(the result array)
1174    perfectly_shredded_to_arrow_primitive_test!(
1175        get_variant_perfectly_shredded_decimal16_within8_as_decimal16,
1176        DataType::Decimal128(10, 1),
1177        perfectly_shredded_decimal16_within_decimal8_variant_array,
1178        Decimal128Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1179            .with_precision_and_scale(10, 1)
1180            .unwrap()
1181    );
1182
1183    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal16_variant_array, || {
1184        Decimal128Array::from(vec![
1185            Some(i128::from_str("12345678901234567899").unwrap()),
1186            Some(i128::from_str("23445677483748324300").unwrap()),
1187            Some(i128::from_str("-12345678901234567899").unwrap()),
1188        ])
1189        .with_precision_and_scale(20, 3)
1190        .unwrap()
1191    });
1192
1193    // This tests will covert the logic DataType::Decimal128(the original array)
1194    // -> Variant::Decimal16(VariantArray) -> DataType::Decimal128(the result array)
1195    perfectly_shredded_to_arrow_primitive_test!(
1196        get_variant_perfectly_shredded_decimal16_as_decimal16,
1197        DataType::Decimal128(20, 3),
1198        perfectly_shredded_decimal16_variant_array,
1199        Decimal128Array::from(vec![
1200            Some(i128::from_str("12345678901234567899").unwrap()),
1201            Some(i128::from_str("23445677483748324300").unwrap()),
1202            Some(i128::from_str("-12345678901234567899").unwrap())
1203        ])
1204        .with_precision_and_scale(20, 3)
1205        .unwrap()
1206    );
1207
1208    macro_rules! assert_variant_get_as_variant_array_with_default_option {
1209        ($variant_array: expr, $array_expected: expr) => {{
1210            let options = GetOptions::new();
1211            let array = $variant_array;
1212            let result = variant_get(&array, options).unwrap();
1213
1214            // expect the result is a VariantArray
1215            let result = VariantArray::try_new(&result).unwrap();
1216
1217            assert_eq!(result.len(), $array_expected.len());
1218
1219            for (idx, item) in $array_expected.into_iter().enumerate() {
1220                match item {
1221                    Some(item) => assert_eq!(result.value(idx), item),
1222                    None => assert!(result.is_null(idx)),
1223                }
1224            }
1225        }};
1226    }
1227
1228    partially_shredded_variant_array_gen!(
1229        partially_shredded_timestamp_micro_ntz_variant_array,
1230        || {
1231            arrow::array::TimestampMicrosecondArray::from(vec![
1232                Some(-456000),
1233                None,
1234                None,
1235                Some(1758602096000000),
1236            ])
1237        }
1238    );
1239
1240    #[test]
1241    fn get_variant_partial_shredded_timestamp_micro_ntz_as_variant() {
1242        let array = partially_shredded_timestamp_micro_ntz_variant_array();
1243        assert_variant_get_as_variant_array_with_default_option!(
1244            array,
1245            vec![
1246                Some(Variant::from(
1247                    DateTime::from_timestamp_micros(-456000i64)
1248                        .unwrap()
1249                        .naive_utc(),
1250                )),
1251                None,
1252                Some(Variant::from("n/a")),
1253                Some(Variant::from(
1254                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1255                        .unwrap()
1256                        .naive_utc(),
1257                )),
1258            ]
1259        )
1260    }
1261
1262    partially_shredded_variant_array_gen!(partially_shredded_timestamp_micro_variant_array, || {
1263        arrow::array::TimestampMicrosecondArray::from(vec![
1264            Some(-456000),
1265            None,
1266            None,
1267            Some(1758602096000000),
1268        ])
1269        .with_timezone("+00:00")
1270    });
1271
1272    #[test]
1273    fn get_variant_partial_shredded_timestamp_micro_as_variant() {
1274        let array = partially_shredded_timestamp_micro_variant_array();
1275        assert_variant_get_as_variant_array_with_default_option!(
1276            array,
1277            vec![
1278                Some(Variant::from(
1279                    DateTime::from_timestamp_micros(-456000i64)
1280                        .unwrap()
1281                        .to_utc(),
1282                )),
1283                None,
1284                Some(Variant::from("n/a")),
1285                Some(Variant::from(
1286                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1287                        .unwrap()
1288                        .to_utc(),
1289                )),
1290            ]
1291        )
1292    }
1293
1294    partially_shredded_variant_array_gen!(
1295        partially_shredded_timestamp_nano_ntz_variant_array,
1296        || {
1297            arrow::array::TimestampNanosecondArray::from(vec![
1298                Some(-4999999561),
1299                None,
1300                None,
1301                Some(1758602096000000000),
1302            ])
1303        }
1304    );
1305
1306    #[test]
1307    fn get_variant_partial_shredded_timestamp_nano_ntz_as_variant() {
1308        let array = partially_shredded_timestamp_nano_ntz_variant_array();
1309
1310        assert_variant_get_as_variant_array_with_default_option!(
1311            array,
1312            vec![
1313                Some(Variant::from(
1314                    DateTime::from_timestamp(-5, 439).unwrap().naive_utc()
1315                )),
1316                None,
1317                Some(Variant::from("n/a")),
1318                Some(Variant::from(
1319                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1320                        .unwrap()
1321                        .naive_utc()
1322                )),
1323            ]
1324        )
1325    }
1326
1327    partially_shredded_variant_array_gen!(partially_shredded_timestamp_nano_variant_array, || {
1328        arrow::array::TimestampNanosecondArray::from(vec![
1329            Some(-4999999561),
1330            None,
1331            None,
1332            Some(1758602096000000000),
1333        ])
1334        .with_timezone("+00:00")
1335    });
1336
1337    #[test]
1338    fn get_variant_partial_shredded_timestamp_nano_as_variant() {
1339        let array = partially_shredded_timestamp_nano_variant_array();
1340
1341        assert_variant_get_as_variant_array_with_default_option!(
1342            array,
1343            vec![
1344                Some(Variant::from(
1345                    DateTime::from_timestamp(-5, 439).unwrap().to_utc()
1346                )),
1347                None,
1348                Some(Variant::from("n/a")),
1349                Some(Variant::from(
1350                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1351                        .unwrap()
1352                        .to_utc()
1353                )),
1354            ]
1355        )
1356    }
1357
1358    perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_variant_array, || {
1359        BinaryArray::from(vec![
1360            Some(b"Apache" as &[u8]),
1361            Some(b"Arrow-rs" as &[u8]),
1362            Some(b"Parquet-variant" as &[u8]),
1363        ])
1364    });
1365
1366    perfectly_shredded_to_arrow_primitive_test!(
1367        get_variant_perfectly_shredded_binary_as_binary,
1368        DataType::Binary,
1369        perfectly_shredded_binary_variant_array,
1370        BinaryArray::from(vec![
1371            Some(b"Apache" as &[u8]),
1372            Some(b"Arrow-rs" as &[u8]),
1373            Some(b"Parquet-variant" as &[u8]),
1374        ])
1375    );
1376
1377    perfectly_shredded_variant_array_fn!(perfectly_shredded_large_binary_variant_array, || {
1378        LargeBinaryArray::from(vec![
1379            Some(b"Apache" as &[u8]),
1380            Some(b"Arrow-rs" as &[u8]),
1381            Some(b"Parquet-variant" as &[u8]),
1382        ])
1383    });
1384
1385    perfectly_shredded_to_arrow_primitive_test!(
1386        get_variant_perfectly_shredded_large_binary_as_large_binary,
1387        DataType::LargeBinary,
1388        perfectly_shredded_large_binary_variant_array,
1389        LargeBinaryArray::from(vec![
1390            Some(b"Apache" as &[u8]),
1391            Some(b"Arrow-rs" as &[u8]),
1392            Some(b"Parquet-variant" as &[u8]),
1393        ])
1394    );
1395
1396    perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_view_variant_array, || {
1397        BinaryViewArray::from(vec![
1398            Some(b"Apache" as &[u8]),
1399            Some(b"Arrow-rs" as &[u8]),
1400            Some(b"Parquet-variant" as &[u8]),
1401        ])
1402    });
1403
1404    perfectly_shredded_to_arrow_primitive_test!(
1405        get_variant_perfectly_shredded_binary_view_as_binary_view,
1406        DataType::BinaryView,
1407        perfectly_shredded_binary_view_variant_array,
1408        BinaryViewArray::from(vec![
1409            Some(b"Apache" as &[u8]),
1410            Some(b"Arrow-rs" as &[u8]),
1411            Some(b"Parquet-variant" as &[u8]),
1412        ])
1413    );
1414
1415    /// Return a VariantArray that represents a normal "shredded" variant
1416    /// for the following example
1417    ///
1418    /// Based on the example from [the doc]
1419    ///
1420    /// [the doc]: https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?tab=t.0
1421    ///
1422    /// ```text
1423    /// 34
1424    /// null (an Arrow NULL, not a Variant::Null)
1425    /// "n/a" (a string)
1426    /// 100
1427    /// ```
1428    ///
1429    /// The schema of the corresponding `StructArray` would look like this:
1430    ///
1431    /// ```text
1432    /// StructArray {
1433    ///   metadata: BinaryViewArray,
1434    ///   value: BinaryViewArray,
1435    ///   typed_value: Int32Array,
1436    /// }
1437    /// ```
1438    macro_rules! numeric_partially_shredded_variant_array_fn {
1439        ($func:ident, $array_type:ident, $primitive_type:ty) => {
1440            partially_shredded_variant_array_gen!($func, || $array_type::from(vec![
1441                Some(<$primitive_type>::try_from(34u8).unwrap()), // row 0 is shredded, so it has a value
1442                None,                                             // row 1 is null, so no value
1443                None, // row 2 is a string, so no typed value
1444                Some(<$primitive_type>::try_from(100u8).unwrap()), // row 3 is shredded, so it has a value
1445            ]));
1446        };
1447    }
1448
1449    macro_rules! partially_shredded_variant_array_gen {
1450        ($func:ident, $typed_array_gen: expr) => {
1451            fn $func() -> ArrayRef {
1452                // At the time of writing, the `VariantArrayBuilder` does not support shredding.
1453                // so we must construct the array manually.  see https://github.com/apache/arrow-rs/issues/7895
1454                let (metadata, string_value) = {
1455                    let mut builder = parquet_variant::VariantBuilder::new();
1456                    builder.append_value("n/a");
1457                    builder.finish()
1458                };
1459
1460                let nulls = NullBuffer::from(vec![
1461                    true,  // row 0 non null
1462                    false, // row 1 is null
1463                    true,  // row 2 non null
1464                    true,  // row 3 non null
1465                ]);
1466
1467                // metadata is the same for all rows
1468                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
1469
1470                // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
1471                // about why row1 is an empty but non null, value.
1472                let values = BinaryViewArray::from(vec![
1473                    None,                // row 0 is shredded, so no value
1474                    Some(b"" as &[u8]),  // row 1 is null, so empty value (why?)
1475                    Some(&string_value), // copy the string value "N/A"
1476                    None,                // row 3 is shredded, so no value
1477                ]);
1478
1479                let typed_value = $typed_array_gen();
1480
1481                let struct_array = StructArrayBuilder::new()
1482                    .with_field("metadata", Arc::new(metadata), false)
1483                    .with_field("typed_value", Arc::new(typed_value), true)
1484                    .with_field("value", Arc::new(values), true)
1485                    .with_nulls(nulls)
1486                    .build();
1487
1488                ArrayRef::from(
1489                    VariantArray::try_new(&struct_array).expect("should create variant array"),
1490                )
1491            }
1492        };
1493    }
1494
1495    numeric_partially_shredded_variant_array_fn!(
1496        partially_shredded_int8_variant_array,
1497        Int8Array,
1498        i8
1499    );
1500    numeric_partially_shredded_variant_array_fn!(
1501        partially_shredded_int16_variant_array,
1502        Int16Array,
1503        i16
1504    );
1505    numeric_partially_shredded_variant_array_fn!(
1506        partially_shredded_int32_variant_array,
1507        Int32Array,
1508        i32
1509    );
1510    numeric_partially_shredded_variant_array_fn!(
1511        partially_shredded_int64_variant_array,
1512        Int64Array,
1513        i64
1514    );
1515    numeric_partially_shredded_variant_array_fn!(
1516        partially_shredded_float32_variant_array,
1517        Float32Array,
1518        f32
1519    );
1520    numeric_partially_shredded_variant_array_fn!(
1521        partially_shredded_float64_variant_array,
1522        Float64Array,
1523        f64
1524    );
1525
1526    partially_shredded_variant_array_gen!(partially_shredded_bool_variant_array, || {
1527        arrow::array::BooleanArray::from(vec![
1528            Some(true),  // row 0 is shredded, so it has a value
1529            None,        // row 1 is null, so no value
1530            None,        // row 2 is a string, so no typed value
1531            Some(false), // row 3 is shredded, so it has a value
1532        ])
1533    });
1534
1535    partially_shredded_variant_array_gen!(partially_shredded_utf8_variant_array, || {
1536        StringArray::from(vec![
1537            Some("hello"), // row 0 is shredded
1538            None,          // row 1 is null
1539            None,          // row 2 is a string
1540            Some("world"), // row 3 is shredded
1541        ])
1542    });
1543
1544    partially_shredded_variant_array_gen!(partially_shredded_date32_variant_array, || {
1545        Date32Array::from(vec![
1546            Some(20348), // row 0 is shredded, 2025-09-17
1547            None,        // row 1 is null
1548            None,        // row 2 is a string, not a date
1549            Some(20340), // row 3 is shredded, 2025-09-09
1550        ])
1551    });
1552
1553    /// Return a VariantArray that represents an "all null" variant
1554    /// for the following example (3 null values):
1555    ///
1556    /// ```text
1557    /// null
1558    /// null
1559    /// null
1560    /// ```
1561    ///
1562    /// The schema of the corresponding `StructArray` would look like this:
1563    ///
1564    /// ```text
1565    /// StructArray {
1566    ///   metadata: BinaryViewArray,
1567    /// }
1568    /// ```
1569    fn all_null_variant_array() -> ArrayRef {
1570        let nulls = NullBuffer::from(vec![
1571            false, // row 0 is null
1572            false, // row 1 is null
1573            false, // row 2 is null
1574        ]);
1575
1576        // metadata is the same for all rows (though they're all null)
1577        let metadata =
1578            BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3));
1579
1580        let struct_array = StructArrayBuilder::new()
1581            .with_field("metadata", Arc::new(metadata), false)
1582            .with_nulls(nulls)
1583            .build();
1584
1585        Arc::new(struct_array)
1586    }
1587    /// This test manually constructs a shredded variant array representing objects
1588    /// like {"x": 1, "y": "foo"} and {"x": 42} and tests extracting the "x" field
1589    /// as VariantArray using variant_get.
1590    #[test]
1591    fn test_shredded_object_field_access() {
1592        let array = shredded_object_with_x_field_variant_array();
1593
1594        // Test: Extract the "x" field as VariantArray first
1595        let options = GetOptions::new_with_path(VariantPath::from("x"));
1596        let result = variant_get(&array, options).unwrap();
1597
1598        let result_variant = VariantArray::try_new(&result).unwrap();
1599        assert_eq!(result_variant.len(), 2);
1600
1601        // Row 0: expect x=1
1602        assert_eq!(result_variant.value(0), Variant::Int32(1));
1603        // Row 1: expect x=42
1604        assert_eq!(result_variant.value(1), Variant::Int32(42));
1605    }
1606
1607    /// Test extracting shredded object field with type conversion
1608    #[test]
1609    fn test_shredded_object_field_as_int32() {
1610        let array = shredded_object_with_x_field_variant_array();
1611
1612        // Test: Extract the "x" field as Int32Array (type conversion)
1613        let field = Field::new("x", DataType::Int32, false);
1614        let options = GetOptions::new_with_path(VariantPath::from("x"))
1615            .with_as_type(Some(FieldRef::from(field)));
1616        let result = variant_get(&array, options).unwrap();
1617
1618        // Should get Int32Array
1619        let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(42)]));
1620        assert_eq!(&result, &expected);
1621    }
1622
1623    /// Helper function to create a shredded variant array representing objects
1624    ///
1625    /// This creates an array that represents:
1626    /// Row 0: {"x": 1, "y": "foo"}  (x is shredded, y is in value field)
1627    /// Row 1: {"x": 42}             (x is shredded, perfect shredding)
1628    ///
1629    /// The physical layout follows the shredding spec where:
1630    /// - metadata: contains object metadata
1631    /// - typed_value: StructArray with field "x" (ShreddedVariantFieldArray)
1632    /// - value: contains fallback for unshredded fields like {"y": "foo"}
1633    /// - The "x" field has typed_value=Int32Array and value=NULL (perfect shredding)
1634    fn shredded_object_with_x_field_variant_array() -> ArrayRef {
1635        // Create the base metadata for objects
1636        let (metadata, y_field_value) = {
1637            let mut builder = parquet_variant::VariantBuilder::new();
1638            let mut obj = builder.new_object();
1639            obj.insert("x", Variant::Int32(42));
1640            obj.insert("y", Variant::from("foo"));
1641            obj.finish();
1642            builder.finish()
1643        };
1644
1645        // Create metadata array (same for both rows)
1646        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
1647
1648        // Create the main value field per the 3-step shredding spec:
1649        // Step 2: If field not in shredding schema, check value field
1650        // Row 0: {"y": "foo"} (y is not shredded, stays in value for step 2)
1651        // Row 1: {} (empty object - no unshredded fields)
1652        let empty_object_value = {
1653            let mut builder = parquet_variant::VariantBuilder::new();
1654            let obj = builder.new_object();
1655            obj.finish();
1656            let (_, value) = builder.finish();
1657            value
1658        };
1659
1660        let value_array = BinaryViewArray::from(vec![
1661            Some(y_field_value.as_slice()),      // Row 0 has {"y": "foo"}
1662            Some(empty_object_value.as_slice()), // Row 1 has {}
1663        ]);
1664
1665        // Create the "x" field as a ShreddedVariantFieldArray
1666        // This represents the shredded Int32 values for the "x" field
1667        let x_field_typed_value = Int32Array::from(vec![Some(1), Some(42)]);
1668
1669        // For perfect shredding of the x field, no "value" column, only typed_value
1670        let x_field_struct = StructArrayBuilder::new()
1671            .with_field("typed_value", Arc::new(x_field_typed_value), true)
1672            .build();
1673
1674        // Wrap the x field struct in a ShreddedVariantFieldArray
1675        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
1676            .expect("should create ShreddedVariantFieldArray");
1677
1678        // Create the main typed_value as a struct containing the "x" field
1679        let typed_value_fields = Fields::from(vec![Field::new(
1680            "x",
1681            x_field_shredded.data_type().clone(),
1682            true,
1683        )]);
1684        let typed_value_struct = StructArray::try_new(
1685            typed_value_fields,
1686            vec![ArrayRef::from(x_field_shredded)],
1687            None, // No nulls - both rows have the object structure
1688        )
1689        .unwrap();
1690
1691        // Create the main VariantArray
1692        let main_struct = StructArrayBuilder::new()
1693            .with_field("metadata", Arc::new(metadata_array), false)
1694            .with_field("value", Arc::new(value_array), true)
1695            .with_field("typed_value", Arc::new(typed_value_struct), true)
1696            .build();
1697
1698        Arc::new(main_struct)
1699    }
1700
1701    /// Simple test to check if nested paths are supported by current implementation
1702    #[test]
1703    fn test_simple_nested_path_support() {
1704        // Check: How does VariantPath parse different strings?
1705        println!("Testing path parsing:");
1706
1707        let path_x = VariantPath::from("x");
1708        let elements_x: Vec<_> = path_x.iter().collect();
1709        println!("  'x' -> {} elements: {:?}", elements_x.len(), elements_x);
1710
1711        let path_ax = VariantPath::from("a.x");
1712        let elements_ax: Vec<_> = path_ax.iter().collect();
1713        println!(
1714            "  'a.x' -> {} elements: {:?}",
1715            elements_ax.len(),
1716            elements_ax
1717        );
1718
1719        let path_ax_alt = VariantPath::from("$.a.x");
1720        let elements_ax_alt: Vec<_> = path_ax_alt.iter().collect();
1721        println!(
1722            "  '$.a.x' -> {} elements: {:?}",
1723            elements_ax_alt.len(),
1724            elements_ax_alt
1725        );
1726
1727        let path_nested = VariantPath::from("a").join("x");
1728        let elements_nested: Vec<_> = path_nested.iter().collect();
1729        println!(
1730            "  VariantPath::from('a').join('x') -> {} elements: {:?}",
1731            elements_nested.len(),
1732            elements_nested
1733        );
1734
1735        // Use your existing simple test data but try "a.x" instead of "x"
1736        let array = shredded_object_with_x_field_variant_array();
1737
1738        // Test if variant_get with REAL nested path throws not implemented error
1739        let real_nested_path = VariantPath::from("a").join("x");
1740        let options = GetOptions::new_with_path(real_nested_path);
1741        let result = variant_get(&array, options);
1742
1743        match result {
1744            Ok(_) => {
1745                println!("Nested path 'a.x' works unexpectedly!");
1746            }
1747            Err(e) => {
1748                println!("Nested path 'a.x' error: {}", e);
1749                if e.to_string().contains("Not yet implemented")
1750                    || e.to_string().contains("NotYetImplemented")
1751                {
1752                    println!("This is expected - nested paths are not implemented");
1753                    return;
1754                }
1755                // Any other error is also expected for now
1756                println!("This shows nested paths need implementation");
1757            }
1758        }
1759    }
1760
1761    /// Test comprehensive variant_get scenarios with Int32 conversion
1762    /// Test depth 0: Direct field access "x" with Int32 conversion
1763    /// Covers shredded vs non-shredded VariantArrays for simple field access
1764    #[test]
1765    fn test_depth_0_int32_conversion() {
1766        println!("=== Testing Depth 0: Direct field access ===");
1767
1768        // Non-shredded test data: [{"x": 42}, {"x": "foo"}, {"y": 10}]
1769        let unshredded_array = create_depth_0_test_data();
1770
1771        let field = Field::new("result", DataType::Int32, true);
1772        let path = VariantPath::from("x");
1773        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1774        let result = variant_get(&unshredded_array, options).unwrap();
1775
1776        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1777            Some(42), // {"x": 42} -> 42
1778            None,     // {"x": "foo"} -> NULL (type mismatch)
1779            None,     // {"y": 10} -> NULL (field missing)
1780        ]));
1781        assert_eq!(&result, &expected);
1782        println!("Depth 0 (unshredded) passed");
1783
1784        // Shredded test data: using simplified approach based on working pattern
1785        let shredded_array = create_depth_0_shredded_test_data_simple();
1786
1787        let field = Field::new("result", DataType::Int32, true);
1788        let path = VariantPath::from("x");
1789        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1790        let result = variant_get(&shredded_array, options).unwrap();
1791
1792        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1793            Some(42), // {"x": 42} -> 42 (from typed_value)
1794            None,     // {"x": "foo"} -> NULL (type mismatch, from value field)
1795        ]));
1796        assert_eq!(&result, &expected);
1797        println!("Depth 0 (shredded) passed");
1798    }
1799
1800    /// Test depth 1: Single nested field access "a.x" with Int32 conversion
1801    /// Covers shredded vs non-shredded VariantArrays for nested field access
1802    #[test]
1803    fn test_depth_1_int32_conversion() {
1804        println!("=== Testing Depth 1: Single nested field access ===");
1805
1806        // Non-shredded test data from the GitHub issue
1807        let unshredded_array = create_nested_path_test_data();
1808
1809        let field = Field::new("result", DataType::Int32, true);
1810        let path = VariantPath::from("a.x"); // Dot notation!
1811        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1812        let result = variant_get(&unshredded_array, options).unwrap();
1813
1814        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1815            Some(55), // {"a": {"x": 55}} -> 55
1816            None,     // {"a": {"x": "foo"}} -> NULL (type mismatch)
1817        ]));
1818        assert_eq!(&result, &expected);
1819        println!("Depth 1 (unshredded) passed");
1820
1821        // Shredded test data: depth 1 nested shredding
1822        let shredded_array = create_depth_1_shredded_test_data_working();
1823
1824        let field = Field::new("result", DataType::Int32, true);
1825        let path = VariantPath::from("a.x"); // Dot notation!
1826        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1827        let result = variant_get(&shredded_array, options).unwrap();
1828
1829        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1830            Some(55), // {"a": {"x": 55}} -> 55 (from nested shredded x)
1831            None,     // {"a": {"x": "foo"}} -> NULL (type mismatch in nested value)
1832        ]));
1833        assert_eq!(&result, &expected);
1834        println!("Depth 1 (shredded) passed");
1835    }
1836
1837    /// Test depth 2: Double nested field access "a.b.x" with Int32 conversion  
1838    /// Covers shredded vs non-shredded VariantArrays for deeply nested field access
1839    #[test]
1840    fn test_depth_2_int32_conversion() {
1841        println!("=== Testing Depth 2: Double nested field access ===");
1842
1843        // Non-shredded test data: [{"a": {"b": {"x": 100}}}, {"a": {"b": {"x": "bar"}}}, {"a": {"b": {"y": 200}}}]
1844        let unshredded_array = create_depth_2_test_data();
1845
1846        let field = Field::new("result", DataType::Int32, true);
1847        let path = VariantPath::from("a.b.x"); // Double nested dot notation!
1848        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1849        let result = variant_get(&unshredded_array, options).unwrap();
1850
1851        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1852            Some(100), // {"a": {"b": {"x": 100}}} -> 100
1853            None,      // {"a": {"b": {"x": "bar"}}} -> NULL (type mismatch)
1854            None,      // {"a": {"b": {"y": 200}}} -> NULL (field missing)
1855        ]));
1856        assert_eq!(&result, &expected);
1857        println!("Depth 2 (unshredded) passed");
1858
1859        // Shredded test data: depth 2 nested shredding
1860        let shredded_array = create_depth_2_shredded_test_data_working();
1861
1862        let field = Field::new("result", DataType::Int32, true);
1863        let path = VariantPath::from("a.b.x"); // Double nested dot notation!
1864        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1865        let result = variant_get(&shredded_array, options).unwrap();
1866
1867        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1868            Some(100), // {"a": {"b": {"x": 100}}} -> 100 (from deeply nested shredded x)
1869            None,      // {"a": {"b": {"x": "bar"}}} -> NULL (type mismatch in deep value)
1870            None,      // {"a": {"b": {"y": 200}}} -> NULL (field missing in deep structure)
1871        ]));
1872        assert_eq!(&result, &expected);
1873        println!("Depth 2 (shredded) passed");
1874    }
1875
1876    /// Test that demonstrates what CURRENTLY WORKS
1877    ///
1878    /// This shows that nested path functionality does work, but only when the
1879    /// test data matches what the current implementation expects
1880    #[test]
1881    fn test_current_nested_path_functionality() {
1882        let array = shredded_object_with_x_field_variant_array();
1883
1884        // Test: Extract the "x" field (single level) - this works
1885        let single_path = VariantPath::from("x");
1886        let field = Field::new("result", DataType::Int32, true);
1887        let options =
1888            GetOptions::new_with_path(single_path).with_as_type(Some(FieldRef::from(field)));
1889        let result = variant_get(&array, options).unwrap();
1890
1891        println!("Single path 'x' works - result: {:?}", result);
1892
1893        // Test: Try nested path "a.x" - this is what we need to implement
1894        let nested_path = VariantPath::from("a").join("x");
1895        let field = Field::new("result", DataType::Int32, true);
1896        let options =
1897            GetOptions::new_with_path(nested_path).with_as_type(Some(FieldRef::from(field)));
1898        let result = variant_get(&array, options).unwrap();
1899
1900        println!("Nested path 'a.x' result: {:?}", result);
1901    }
1902
1903    /// Create test data for depth 0 (direct field access)
1904    /// [{"x": 42}, {"x": "foo"}, {"y": 10}]
1905    fn create_depth_0_test_data() -> ArrayRef {
1906        let mut builder = crate::VariantArrayBuilder::new(3);
1907
1908        // Row 1: {"x": 42}
1909        {
1910            let json_str = r#"{"x": 42}"#;
1911            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1912            if let Ok(variant_array) = json_to_variant(&string_array) {
1913                builder.append_variant(variant_array.value(0));
1914            } else {
1915                builder.append_null();
1916            }
1917        }
1918
1919        // Row 2: {"x": "foo"}
1920        {
1921            let json_str = r#"{"x": "foo"}"#;
1922            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1923            if let Ok(variant_array) = json_to_variant(&string_array) {
1924                builder.append_variant(variant_array.value(0));
1925            } else {
1926                builder.append_null();
1927            }
1928        }
1929
1930        // Row 3: {"y": 10} (missing "x" field)
1931        {
1932            let json_str = r#"{"y": 10}"#;
1933            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1934            if let Ok(variant_array) = json_to_variant(&string_array) {
1935                builder.append_variant(variant_array.value(0));
1936            } else {
1937                builder.append_null();
1938            }
1939        }
1940
1941        ArrayRef::from(builder.build())
1942    }
1943
1944    /// Create test data for depth 1 (single nested field)
1945    /// This represents the exact scenarios from the GitHub issue: "a.x"
1946    fn create_nested_path_test_data() -> ArrayRef {
1947        let mut builder = crate::VariantArrayBuilder::new(2);
1948
1949        // Row 1: {"a": {"x": 55}, "b": 42}
1950        {
1951            let json_str = r#"{"a": {"x": 55}, "b": 42}"#;
1952            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1953            if let Ok(variant_array) = json_to_variant(&string_array) {
1954                builder.append_variant(variant_array.value(0));
1955            } else {
1956                builder.append_null();
1957            }
1958        }
1959
1960        // Row 2: {"a": {"x": "foo"}, "b": 42}
1961        {
1962            let json_str = r#"{"a": {"x": "foo"}, "b": 42}"#;
1963            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1964            if let Ok(variant_array) = json_to_variant(&string_array) {
1965                builder.append_variant(variant_array.value(0));
1966            } else {
1967                builder.append_null();
1968            }
1969        }
1970
1971        ArrayRef::from(builder.build())
1972    }
1973
1974    /// Create test data for depth 2 (double nested field)
1975    /// [{"a": {"b": {"x": 100}}}, {"a": {"b": {"x": "bar"}}}, {"a": {"b": {"y": 200}}}]
1976    fn create_depth_2_test_data() -> ArrayRef {
1977        let mut builder = crate::VariantArrayBuilder::new(3);
1978
1979        // Row 1: {"a": {"b": {"x": 100}}}
1980        {
1981            let json_str = r#"{"a": {"b": {"x": 100}}}"#;
1982            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1983            if let Ok(variant_array) = json_to_variant(&string_array) {
1984                builder.append_variant(variant_array.value(0));
1985            } else {
1986                builder.append_null();
1987            }
1988        }
1989
1990        // Row 2: {"a": {"b": {"x": "bar"}}}
1991        {
1992            let json_str = r#"{"a": {"b": {"x": "bar"}}}"#;
1993            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1994            if let Ok(variant_array) = json_to_variant(&string_array) {
1995                builder.append_variant(variant_array.value(0));
1996            } else {
1997                builder.append_null();
1998            }
1999        }
2000
2001        // Row 3: {"a": {"b": {"y": 200}}} (missing "x" field)
2002        {
2003            let json_str = r#"{"a": {"b": {"y": 200}}}"#;
2004            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2005            if let Ok(variant_array) = json_to_variant(&string_array) {
2006                builder.append_variant(variant_array.value(0));
2007            } else {
2008                builder.append_null();
2009            }
2010        }
2011
2012        ArrayRef::from(builder.build())
2013    }
2014
2015    /// Create simple shredded test data for depth 0 using a simplified working pattern
2016    /// Creates 2 rows: [{"x": 42}, {"x": "foo"}] with "x" shredded where possible
2017    fn create_depth_0_shredded_test_data_simple() -> ArrayRef {
2018        // Create base metadata using the working pattern
2019        let (metadata, string_x_value) = {
2020            let mut builder = parquet_variant::VariantBuilder::new();
2021            let mut obj = builder.new_object();
2022            obj.insert("x", Variant::from("foo"));
2023            obj.finish();
2024            builder.finish()
2025        };
2026
2027        // Metadata array (same for both rows)
2028        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
2029
2030        // Value array following the 3-step shredding spec:
2031        // Row 0: {} (x is shredded, no unshredded fields)
2032        // Row 1: {"x": "foo"} (x is a string, can't be shredded to Int32)
2033        let empty_object_value = {
2034            let mut builder = parquet_variant::VariantBuilder::new();
2035            let obj = builder.new_object();
2036            obj.finish();
2037            let (_, value) = builder.finish();
2038            value
2039        };
2040
2041        let value_array = BinaryViewArray::from(vec![
2042            Some(empty_object_value.as_slice()), // Row 0: {} (x shredded out)
2043            Some(string_x_value.as_slice()),     // Row 1: {"x": "foo"} (fallback)
2044        ]);
2045
2046        // Create the "x" field as a ShreddedVariantFieldArray
2047        let x_field_typed_value = Int32Array::from(vec![Some(42), None]);
2048
2049        // For the x field, only typed_value (perfect shredding when possible)
2050        let x_field_struct = StructArrayBuilder::new()
2051            .with_field("typed_value", Arc::new(x_field_typed_value), true)
2052            .build();
2053
2054        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
2055            .expect("should create ShreddedVariantFieldArray");
2056
2057        // Create the main typed_value as a struct containing the "x" field
2058        let typed_value_fields = Fields::from(vec![Field::new(
2059            "x",
2060            x_field_shredded.data_type().clone(),
2061            true,
2062        )]);
2063        let typed_value_struct = StructArray::try_new(
2064            typed_value_fields,
2065            vec![ArrayRef::from(x_field_shredded)],
2066            None,
2067        )
2068        .unwrap();
2069
2070        // Build final VariantArray
2071        let struct_array = StructArrayBuilder::new()
2072            .with_field("metadata", Arc::new(metadata_array), false)
2073            .with_field("value", Arc::new(value_array), true)
2074            .with_field("typed_value", Arc::new(typed_value_struct), true)
2075            .build();
2076
2077        Arc::new(struct_array)
2078    }
2079
2080    /// Create working depth 1 shredded test data based on the existing working pattern
2081    /// This creates a properly structured shredded variant for "a.x" where:
2082    /// - Row 0: {"a": {"x": 55}, "b": 42} with a.x shredded into typed_value
2083    /// - Row 1: {"a": {"x": "foo"}, "b": 42} with a.x fallback to value field due to type mismatch
2084    fn create_depth_1_shredded_test_data_working() -> ArrayRef {
2085        // Create metadata following the working pattern from shredded_object_with_x_field_variant_array
2086        let (metadata, _) = {
2087            // Create nested structure: {"a": {"x": 55}, "b": 42}
2088            let mut builder = parquet_variant::VariantBuilder::new();
2089            let mut obj = builder.new_object();
2090
2091            // Create the nested "a" object
2092            let mut a_obj = obj.new_object("a");
2093            a_obj.insert("x", Variant::Int32(55));
2094            a_obj.finish();
2095
2096            obj.insert("b", Variant::Int32(42));
2097            obj.finish();
2098            builder.finish()
2099        };
2100
2101        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
2102
2103        // Create value arrays for the fallback case
2104        // Following the spec: if field cannot be shredded, it stays in value
2105        let empty_object_value = {
2106            let mut builder = parquet_variant::VariantBuilder::new();
2107            let obj = builder.new_object();
2108            obj.finish();
2109            let (_, value) = builder.finish();
2110            value
2111        };
2112
2113        // Row 1 fallback: use the working pattern from the existing shredded test
2114        // This avoids metadata issues by using the simple fallback approach
2115        let row1_fallback = {
2116            let mut builder = parquet_variant::VariantBuilder::new();
2117            let mut obj = builder.new_object();
2118            obj.insert("fallback", Variant::from("data"));
2119            obj.finish();
2120            let (_, value) = builder.finish();
2121            value
2122        };
2123
2124        let value_array = BinaryViewArray::from(vec![
2125            Some(empty_object_value.as_slice()), // Row 0: {} (everything shredded except b in unshredded fields)
2126            Some(row1_fallback.as_slice()), // Row 1: {"a": {"x": "foo"}, "b": 42} (a.x can't be shredded)
2127        ]);
2128
2129        // Create the nested shredded structure
2130        // Level 2: x field (the deepest level)
2131        let x_typed_value = Int32Array::from(vec![Some(55), None]);
2132        let x_field_struct = StructArrayBuilder::new()
2133            .with_field("typed_value", Arc::new(x_typed_value), true)
2134            .build();
2135        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
2136            .expect("should create ShreddedVariantFieldArray for x");
2137
2138        // Level 1: a field containing x field + value field for fallbacks
2139        // The "a" field needs both typed_value (for shredded x) and value (for fallback cases)
2140
2141        // Create the value field for "a" (for cases where a.x can't be shredded)
2142        let a_value_data = {
2143            let mut builder = parquet_variant::VariantBuilder::new();
2144            let obj = builder.new_object();
2145            obj.finish();
2146            let (_, value) = builder.finish();
2147            value
2148        };
2149        let a_value_array = BinaryViewArray::from(vec![
2150            None,                          // Row 0: x is shredded, so no value fallback needed
2151            Some(a_value_data.as_slice()), // Row 1: fallback for a.x="foo" (but logic will check typed_value first)
2152        ]);
2153
2154        let a_inner_fields = Fields::from(vec![Field::new(
2155            "x",
2156            x_field_shredded.data_type().clone(),
2157            true,
2158        )]);
2159        let a_inner_struct = StructArrayBuilder::new()
2160            .with_field(
2161                "typed_value",
2162                Arc::new(
2163                    StructArray::try_new(
2164                        a_inner_fields,
2165                        vec![ArrayRef::from(x_field_shredded)],
2166                        None,
2167                    )
2168                    .unwrap(),
2169                ),
2170                true,
2171            )
2172            .with_field("value", Arc::new(a_value_array), true)
2173            .build();
2174        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct)
2175            .expect("should create ShreddedVariantFieldArray for a");
2176
2177        // Level 0: main typed_value struct containing a field
2178        let typed_value_fields = Fields::from(vec![Field::new(
2179            "a",
2180            a_field_shredded.data_type().clone(),
2181            true,
2182        )]);
2183        let typed_value_struct = StructArray::try_new(
2184            typed_value_fields,
2185            vec![ArrayRef::from(a_field_shredded)],
2186            None,
2187        )
2188        .unwrap();
2189
2190        // Build final VariantArray
2191        let struct_array = StructArrayBuilder::new()
2192            .with_field("metadata", Arc::new(metadata_array), false)
2193            .with_field("value", Arc::new(value_array), true)
2194            .with_field("typed_value", Arc::new(typed_value_struct), true)
2195            .build();
2196
2197        Arc::new(struct_array)
2198    }
2199
2200    /// Create working depth 2 shredded test data for "a.b.x" paths
2201    /// This creates a 3-level nested shredded structure where:
2202    /// - Row 0: {"a": {"b": {"x": 100}}} with a.b.x shredded into typed_value
2203    /// - Row 1: {"a": {"b": {"x": "bar"}}} with type mismatch fallback
2204    /// - Row 2: {"a": {"b": {"y": 200}}} with missing field fallback
2205    fn create_depth_2_shredded_test_data_working() -> ArrayRef {
2206        // Create metadata following the working pattern
2207        let (metadata, _) = {
2208            // Create deeply nested structure: {"a": {"b": {"x": 100}}}
2209            let mut builder = parquet_variant::VariantBuilder::new();
2210            let mut obj = builder.new_object();
2211
2212            // Create the nested "a.b" structure
2213            let mut a_obj = obj.new_object("a");
2214            let mut b_obj = a_obj.new_object("b");
2215            b_obj.insert("x", Variant::Int32(100));
2216            b_obj.finish();
2217            a_obj.finish();
2218
2219            obj.finish();
2220            builder.finish()
2221        };
2222
2223        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
2224
2225        // Create value arrays for fallback cases
2226        let empty_object_value = {
2227            let mut builder = parquet_variant::VariantBuilder::new();
2228            let obj = builder.new_object();
2229            obj.finish();
2230            let (_, value) = builder.finish();
2231            value
2232        };
2233
2234        // Simple fallback values - avoiding complex nested metadata
2235        let value_array = BinaryViewArray::from(vec![
2236            Some(empty_object_value.as_slice()), // Row 0: fully shredded
2237            Some(empty_object_value.as_slice()), // Row 1: fallback (simplified)
2238            Some(empty_object_value.as_slice()), // Row 2: fallback (simplified)
2239        ]);
2240
2241        // Create the deeply nested shredded structure: a.b.x
2242
2243        // Level 3: x field (deepest level)
2244        let x_typed_value = Int32Array::from(vec![Some(100), None, None]);
2245        let x_field_struct = StructArrayBuilder::new()
2246            .with_field("typed_value", Arc::new(x_typed_value), true)
2247            .build();
2248        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
2249            .expect("should create ShreddedVariantFieldArray for x");
2250
2251        // Level 2: b field containing x field + value field
2252        let b_value_data = {
2253            let mut builder = parquet_variant::VariantBuilder::new();
2254            let obj = builder.new_object();
2255            obj.finish();
2256            let (_, value) = builder.finish();
2257            value
2258        };
2259        let b_value_array = BinaryViewArray::from(vec![
2260            None,                          // Row 0: x is shredded
2261            Some(b_value_data.as_slice()), // Row 1: fallback for b.x="bar"
2262            Some(b_value_data.as_slice()), // Row 2: fallback for b.y=200
2263        ]);
2264
2265        let b_inner_fields = Fields::from(vec![Field::new(
2266            "x",
2267            x_field_shredded.data_type().clone(),
2268            true,
2269        )]);
2270        let b_inner_struct = StructArrayBuilder::new()
2271            .with_field(
2272                "typed_value",
2273                Arc::new(
2274                    StructArray::try_new(
2275                        b_inner_fields,
2276                        vec![ArrayRef::from(x_field_shredded)],
2277                        None,
2278                    )
2279                    .unwrap(),
2280                ),
2281                true,
2282            )
2283            .with_field("value", Arc::new(b_value_array), true)
2284            .build();
2285        let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_inner_struct)
2286            .expect("should create ShreddedVariantFieldArray for b");
2287
2288        // Level 1: a field containing b field + value field
2289        let a_value_data = {
2290            let mut builder = parquet_variant::VariantBuilder::new();
2291            let obj = builder.new_object();
2292            obj.finish();
2293            let (_, value) = builder.finish();
2294            value
2295        };
2296        let a_value_array = BinaryViewArray::from(vec![
2297            None,                          // Row 0: b is shredded
2298            Some(a_value_data.as_slice()), // Row 1: fallback for a.b.*
2299            Some(a_value_data.as_slice()), // Row 2: fallback for a.b.*
2300        ]);
2301
2302        let a_inner_fields = Fields::from(vec![Field::new(
2303            "b",
2304            b_field_shredded.data_type().clone(),
2305            true,
2306        )]);
2307        let a_inner_struct = StructArrayBuilder::new()
2308            .with_field(
2309                "typed_value",
2310                Arc::new(
2311                    StructArray::try_new(
2312                        a_inner_fields,
2313                        vec![ArrayRef::from(b_field_shredded)],
2314                        None,
2315                    )
2316                    .unwrap(),
2317                ),
2318                true,
2319            )
2320            .with_field("value", Arc::new(a_value_array), true)
2321            .build();
2322        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct)
2323            .expect("should create ShreddedVariantFieldArray for a");
2324
2325        // Level 0: main typed_value struct containing a field
2326        let typed_value_fields = Fields::from(vec![Field::new(
2327            "a",
2328            a_field_shredded.data_type().clone(),
2329            true,
2330        )]);
2331        let typed_value_struct = StructArray::try_new(
2332            typed_value_fields,
2333            vec![ArrayRef::from(a_field_shredded)],
2334            None,
2335        )
2336        .unwrap();
2337
2338        // Build final VariantArray
2339        let struct_array = StructArrayBuilder::new()
2340            .with_field("metadata", Arc::new(metadata_array), false)
2341            .with_field("value", Arc::new(value_array), true)
2342            .with_field("typed_value", Arc::new(typed_value_struct), true)
2343            .build();
2344
2345        Arc::new(struct_array)
2346    }
2347
2348    #[test]
2349    fn test_strict_cast_options_downcast_failure() {
2350        use arrow::compute::CastOptions;
2351        use arrow::datatypes::{DataType, Field};
2352        use arrow::error::ArrowError;
2353        use parquet_variant::VariantPath;
2354        use std::sync::Arc;
2355
2356        // Use the existing simple test data that has Int32 as typed_value
2357        let variant_array = perfectly_shredded_int32_variant_array();
2358
2359        // Try to access a field with safe cast options (should return NULLs)
2360        let safe_options = GetOptions {
2361            path: VariantPath::from("nonexistent_field"),
2362            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2363            cast_options: CastOptions::default(), // safe = true
2364        };
2365
2366        let variant_array_ref: Arc<dyn Array> = variant_array.clone();
2367        let result = variant_get(&variant_array_ref, safe_options);
2368        // Should succeed and return NULLs (safe behavior)
2369        assert!(result.is_ok());
2370        let result_array = result.unwrap();
2371        assert_eq!(result_array.len(), 3);
2372        assert!(result_array.is_null(0));
2373        assert!(result_array.is_null(1));
2374        assert!(result_array.is_null(2));
2375
2376        // Try to access a field with strict cast options (should error)
2377        let strict_options = GetOptions {
2378            path: VariantPath::from("nonexistent_field"),
2379            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2380            cast_options: CastOptions {
2381                safe: false,
2382                ..Default::default()
2383            },
2384        };
2385
2386        let result = variant_get(&variant_array_ref, strict_options);
2387        // Should fail with a cast error
2388        assert!(result.is_err());
2389        let error = result.unwrap_err();
2390        assert!(matches!(error, ArrowError::CastError(_)));
2391        assert!(
2392            error
2393                .to_string()
2394                .contains("Cannot access field 'nonexistent_field' on non-struct type")
2395        );
2396    }
2397
2398    #[test]
2399    fn test_error_message_boolean_type_display() {
2400        let mut builder = VariantArrayBuilder::new(1);
2401        builder.append_variant(Variant::Int32(123));
2402        let variant_array: ArrayRef = ArrayRef::from(builder.build());
2403
2404        // Request Boolean with strict casting to force an error
2405        let options = GetOptions {
2406            path: VariantPath::default(),
2407            as_type: Some(Arc::new(Field::new("result", DataType::Boolean, true))),
2408            cast_options: CastOptions {
2409                safe: false,
2410                ..Default::default()
2411            },
2412        };
2413
2414        let err = variant_get(&variant_array, options).unwrap_err();
2415        let msg = err.to_string();
2416        assert!(msg.contains("Failed to extract primitive of type Boolean"));
2417    }
2418
2419    #[test]
2420    fn test_error_message_numeric_type_display() {
2421        let mut builder = VariantArrayBuilder::new(1);
2422        builder.append_variant(Variant::BooleanTrue);
2423        let variant_array: ArrayRef = ArrayRef::from(builder.build());
2424
2425        // Request Boolean with strict casting to force an error
2426        let options = GetOptions {
2427            path: VariantPath::default(),
2428            as_type: Some(Arc::new(Field::new("result", DataType::Float32, true))),
2429            cast_options: CastOptions {
2430                safe: false,
2431                ..Default::default()
2432            },
2433        };
2434
2435        let err = variant_get(&variant_array, options).unwrap_err();
2436        let msg = err.to_string();
2437        assert!(msg.contains("Failed to extract primitive of type Float32"));
2438    }
2439
2440    #[test]
2441    fn test_error_message_temporal_type_display() {
2442        let mut builder = VariantArrayBuilder::new(1);
2443        builder.append_variant(Variant::BooleanFalse);
2444        let variant_array: ArrayRef = ArrayRef::from(builder.build());
2445
2446        // Request Boolean with strict casting to force an error
2447        let options = GetOptions {
2448            path: VariantPath::default(),
2449            as_type: Some(Arc::new(Field::new(
2450                "result",
2451                DataType::Timestamp(TimeUnit::Nanosecond, None),
2452                true,
2453            ))),
2454            cast_options: CastOptions {
2455                safe: false,
2456                ..Default::default()
2457            },
2458        };
2459
2460        let err = variant_get(&variant_array, options).unwrap_err();
2461        let msg = err.to_string();
2462        assert!(msg.contains("Failed to extract primitive of type Timestamp(ns)"));
2463    }
2464
2465    #[test]
2466    fn test_null_buffer_union_for_shredded_paths() {
2467        use arrow::compute::CastOptions;
2468        use arrow::datatypes::{DataType, Field};
2469        use parquet_variant::VariantPath;
2470        use std::sync::Arc;
2471
2472        // Test that null buffers are properly unioned when traversing shredded paths
2473        // This test verifies scovich's null buffer union requirement
2474
2475        // Create a depth-1 shredded variant array where:
2476        // - The top-level variant array has some nulls
2477        // - The nested typed_value also has some nulls
2478        // - The result should be the union of both null buffers
2479
2480        let variant_array = create_depth_1_shredded_test_data_working();
2481
2482        // Get the field "x" which should union nulls from:
2483        // 1. The top-level variant array nulls
2484        // 2. The "a" field's typed_value nulls
2485        // 3. The "x" field's typed_value nulls
2486        let options = GetOptions {
2487            path: VariantPath::from("a.x"),
2488            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2489            cast_options: CastOptions::default(),
2490        };
2491
2492        let variant_array_ref: Arc<dyn Array> = variant_array.clone();
2493        let result = variant_get(&variant_array_ref, options).unwrap();
2494
2495        // Verify the result length matches input
2496        assert_eq!(result.len(), variant_array.len());
2497
2498        // The null pattern should reflect the union of all ancestor nulls
2499        // Row 0: Should have valid data (path exists and is shredded as Int32)
2500        // Row 1: Should be null (due to type mismatch - "foo" can't cast to Int32)
2501        assert!(!result.is_null(0), "Row 0 should have valid Int32 data");
2502        assert!(
2503            result.is_null(1),
2504            "Row 1 should be null due to type casting failure"
2505        );
2506
2507        // Verify the actual values
2508        let int32_result = result.as_any().downcast_ref::<Int32Array>().unwrap();
2509        assert_eq!(int32_result.value(0), 55); // The valid Int32 value
2510    }
2511
2512    #[test]
2513    fn test_struct_null_mask_union_from_children() {
2514        use arrow::compute::CastOptions;
2515        use arrow::datatypes::{DataType, Field, Fields};
2516        use parquet_variant::VariantPath;
2517        use std::sync::Arc;
2518
2519        use arrow::array::StringArray;
2520
2521        // Test that struct null masks properly union nulls from children field extractions
2522        // This verifies scovich's concern about incomplete null masks in struct construction
2523
2524        // Create test data where some fields will fail type casting
2525        let json_strings = vec![
2526            r#"{"a": 42, "b": "hello"}"#, // Row 0: a=42 (castable to int), b="hello" (not castable to int)
2527            r#"{"a": "world", "b": 100}"#, // Row 1: a="world" (not castable to int), b=100 (castable to int)
2528            r#"{"a": 55, "b": 77}"#,       // Row 2: a=55 (castable to int), b=77 (castable to int)
2529        ];
2530
2531        let string_array: Arc<dyn arrow::array::Array> = Arc::new(StringArray::from(json_strings));
2532        let variant_array = json_to_variant(&string_array).unwrap();
2533
2534        // Request extraction as a struct with both fields as Int32
2535        // This should create child arrays where some fields are null due to casting failures
2536        let struct_fields = Fields::from(vec![
2537            Field::new("a", DataType::Int32, true),
2538            Field::new("b", DataType::Int32, true),
2539        ]);
2540        let struct_type = DataType::Struct(struct_fields);
2541
2542        let options = GetOptions {
2543            path: VariantPath::default(), // Extract the whole object as struct
2544            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
2545            cast_options: CastOptions::default(),
2546        };
2547
2548        let variant_array_ref = ArrayRef::from(variant_array);
2549        let result = variant_get(&variant_array_ref, options).unwrap();
2550
2551        // Verify the result is a StructArray
2552        let struct_result = result.as_struct();
2553        assert_eq!(struct_result.len(), 3);
2554
2555        // Get the individual field arrays
2556        let field_a = struct_result
2557            .column(0)
2558            .as_any()
2559            .downcast_ref::<Int32Array>()
2560            .unwrap();
2561        let field_b = struct_result
2562            .column(1)
2563            .as_any()
2564            .downcast_ref::<Int32Array>()
2565            .unwrap();
2566
2567        // Verify field values and nulls
2568        // Row 0: a=42 (valid), b=null (casting failure)
2569        assert!(!field_a.is_null(0));
2570        assert_eq!(field_a.value(0), 42);
2571        assert!(field_b.is_null(0)); // "hello" can't cast to int
2572
2573        // Row 1: a=null (casting failure), b=100 (valid)
2574        assert!(field_a.is_null(1)); // "world" can't cast to int
2575        assert!(!field_b.is_null(1));
2576        assert_eq!(field_b.value(1), 100);
2577
2578        // Row 2: a=55 (valid), b=77 (valid)
2579        assert!(!field_a.is_null(2));
2580        assert_eq!(field_a.value(2), 55);
2581        assert!(!field_b.is_null(2));
2582        assert_eq!(field_b.value(2), 77);
2583
2584        // Verify the struct-level null mask properly unions child nulls
2585        // The struct should NOT be null in any row because each row has at least one valid field
2586        // (This tests that we're not incorrectly making the entire struct null when children fail)
2587        assert!(!struct_result.is_null(0)); // Has valid field 'a'
2588        assert!(!struct_result.is_null(1)); // Has valid field 'b'
2589        assert!(!struct_result.is_null(2)); // Has both valid fields
2590    }
2591
2592    #[test]
2593    fn test_field_nullability_preservation() {
2594        use arrow::compute::CastOptions;
2595        use arrow::datatypes::{DataType, Field};
2596        use parquet_variant::VariantPath;
2597        use std::sync::Arc;
2598
2599        use arrow::array::StringArray;
2600
2601        // Test that field nullability from GetOptions.as_type is preserved in the result
2602
2603        let json_strings = vec![
2604            r#"{"x": 42}"#,                  // Row 0: Valid int that should convert to Int32
2605            r#"{"x": "not_a_number"}"#,      // Row 1: String that can't cast to Int32
2606            r#"{"x": null}"#,                // Row 2: Explicit null value
2607            r#"{"x": "hello"}"#,             // Row 3: Another string (wrong type)
2608            r#"{"y": 100}"#,                 // Row 4: Missing "x" field (SQL NULL case)
2609            r#"{"x": 127}"#, // Row 5: Small int (could be Int8, widening cast candidate)
2610            r#"{"x": 32767}"#, // Row 6: Medium int (could be Int16, widening cast candidate)
2611            r#"{"x": 2147483647}"#, // Row 7: Max Int32 value (fits in Int32)
2612            r#"{"x": 9223372036854775807}"#, // Row 8: Large Int64 value (cannot convert to Int32)
2613        ];
2614
2615        let string_array: Arc<dyn arrow::array::Array> = Arc::new(StringArray::from(json_strings));
2616        let variant_array = json_to_variant(&string_array).unwrap();
2617
2618        // Test 1: nullable field (should allow nulls from cast failures)
2619        let nullable_field = Arc::new(Field::new("result", DataType::Int32, true));
2620        let options_nullable = GetOptions {
2621            path: VariantPath::from("x"),
2622            as_type: Some(nullable_field.clone()),
2623            cast_options: CastOptions::default(),
2624        };
2625
2626        let variant_array_ref = ArrayRef::from(variant_array);
2627        let result_nullable = variant_get(&variant_array_ref, options_nullable).unwrap();
2628
2629        // Verify we get an Int32Array with nulls for cast failures
2630        let int32_result = result_nullable
2631            .as_any()
2632            .downcast_ref::<Int32Array>()
2633            .unwrap();
2634        assert_eq!(int32_result.len(), 9);
2635
2636        // Row 0: 42 converts successfully to Int32
2637        assert!(!int32_result.is_null(0));
2638        assert_eq!(int32_result.value(0), 42);
2639
2640        // Row 1: "not_a_number" fails to convert -> NULL
2641        assert!(int32_result.is_null(1));
2642
2643        // Row 2: explicit null value -> NULL
2644        assert!(int32_result.is_null(2));
2645
2646        // Row 3: "hello" (wrong type) fails to convert -> NULL
2647        assert!(int32_result.is_null(3));
2648
2649        // Row 4: missing "x" field (SQL NULL case) -> NULL
2650        assert!(int32_result.is_null(4));
2651
2652        // Row 5: 127 (small int, potential Int8 -> Int32 widening)
2653        // Current behavior: JSON parses to Int8, should convert to Int32
2654        assert!(!int32_result.is_null(5));
2655        assert_eq!(int32_result.value(5), 127);
2656
2657        // Row 6: 32767 (medium int, potential Int16 -> Int32 widening)
2658        // Current behavior: JSON parses to Int16, should convert to Int32
2659        assert!(!int32_result.is_null(6));
2660        assert_eq!(int32_result.value(6), 32767);
2661
2662        // Row 7: 2147483647 (max Int32, fits exactly)
2663        // Current behavior: Should convert successfully
2664        assert!(!int32_result.is_null(7));
2665        assert_eq!(int32_result.value(7), 2147483647);
2666
2667        // Row 8: 9223372036854775807 (large Int64, cannot fit in Int32)
2668        // Current behavior: Should fail conversion -> NULL
2669        assert!(int32_result.is_null(8));
2670
2671        // Test 2: non-nullable field (behavior should be the same with safe casting)
2672        let non_nullable_field = Arc::new(Field::new("result", DataType::Int32, false));
2673        let options_non_nullable = GetOptions {
2674            path: VariantPath::from("x"),
2675            as_type: Some(non_nullable_field.clone()),
2676            cast_options: CastOptions::default(), // safe=true by default
2677        };
2678
2679        // Create variant array again since we moved it
2680        let variant_array_2 = json_to_variant(&string_array).unwrap();
2681        let variant_array_ref_2 = ArrayRef::from(variant_array_2);
2682        let result_non_nullable = variant_get(&variant_array_ref_2, options_non_nullable).unwrap();
2683        let int32_result_2 = result_non_nullable
2684            .as_any()
2685            .downcast_ref::<Int32Array>()
2686            .unwrap();
2687
2688        // Even with a non-nullable field, safe casting should still produce nulls for failures
2689        assert_eq!(int32_result_2.len(), 9);
2690
2691        // Row 0: 42 converts successfully to Int32
2692        assert!(!int32_result_2.is_null(0));
2693        assert_eq!(int32_result_2.value(0), 42);
2694
2695        // Rows 1-4: All should be null due to safe casting behavior
2696        // (non-nullable field specification doesn't override safe casting behavior)
2697        assert!(int32_result_2.is_null(1)); // "not_a_number"
2698        assert!(int32_result_2.is_null(2)); // explicit null
2699        assert!(int32_result_2.is_null(3)); // "hello"
2700        assert!(int32_result_2.is_null(4)); // missing field
2701
2702        // Rows 5-7: These should also convert successfully (numeric widening/fitting)
2703        assert!(!int32_result_2.is_null(5)); // 127 (Int8 -> Int32)
2704        assert_eq!(int32_result_2.value(5), 127);
2705        assert!(!int32_result_2.is_null(6)); // 32767 (Int16 -> Int32)
2706        assert_eq!(int32_result_2.value(6), 32767);
2707        assert!(!int32_result_2.is_null(7)); // 2147483647 (fits in Int32)
2708        assert_eq!(int32_result_2.value(7), 2147483647);
2709
2710        // Row 8: Large Int64 should fail conversion -> NULL
2711        assert!(int32_result_2.is_null(8)); // 9223372036854775807 (too large for Int32)
2712    }
2713
2714    #[test]
2715    fn test_struct_extraction_subset_superset_schema_perfectly_shredded() {
2716        // Create variant with diverse null patterns and empty objects
2717        let variant_array = create_comprehensive_shredded_variant();
2718
2719        // Request struct with fields "a", "b", "d" (skip existing "c", add missing "d")
2720        let struct_fields = Fields::from(vec![
2721            Field::new("a", DataType::Int32, true),
2722            Field::new("b", DataType::Int32, true),
2723            Field::new("d", DataType::Int32, true),
2724        ]);
2725        let struct_type = DataType::Struct(struct_fields);
2726
2727        let options = GetOptions {
2728            path: VariantPath::default(),
2729            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
2730            cast_options: CastOptions::default(),
2731        };
2732
2733        let result = variant_get(&variant_array, options).unwrap();
2734
2735        // Verify the result is a StructArray with 3 fields and 5 rows
2736        let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
2737        assert_eq!(struct_result.len(), 5);
2738        assert_eq!(struct_result.num_columns(), 3);
2739
2740        let field_a = struct_result
2741            .column(0)
2742            .as_any()
2743            .downcast_ref::<Int32Array>()
2744            .unwrap();
2745        let field_b = struct_result
2746            .column(1)
2747            .as_any()
2748            .downcast_ref::<Int32Array>()
2749            .unwrap();
2750        let field_d = struct_result
2751            .column(2)
2752            .as_any()
2753            .downcast_ref::<Int32Array>()
2754            .unwrap();
2755
2756        // Row 0: Normal values {"a": 1, "b": 2, "c": 3} → {a: 1, b: 2, d: NULL}
2757        assert!(!struct_result.is_null(0));
2758        assert_eq!(field_a.value(0), 1);
2759        assert_eq!(field_b.value(0), 2);
2760        assert!(field_d.is_null(0)); // Missing field "d"
2761
2762        // Row 1: Top-level NULL → struct-level NULL
2763        assert!(struct_result.is_null(1));
2764
2765        // Row 2: Field "a" missing → {a: NULL, b: 2, d: NULL}
2766        assert!(!struct_result.is_null(2));
2767        assert!(field_a.is_null(2)); // Missing field "a"
2768        assert_eq!(field_b.value(2), 2);
2769        assert!(field_d.is_null(2)); // Missing field "d"
2770
2771        // Row 3: Field "b" missing → {a: 1, b: NULL, d: NULL}
2772        assert!(!struct_result.is_null(3));
2773        assert_eq!(field_a.value(3), 1);
2774        assert!(field_b.is_null(3)); // Missing field "b"
2775        assert!(field_d.is_null(3)); // Missing field "d"
2776
2777        // Row 4: Empty object {} → {a: NULL, b: NULL, d: NULL}
2778        assert!(!struct_result.is_null(4));
2779        assert!(field_a.is_null(4)); // Empty object
2780        assert!(field_b.is_null(4)); // Empty object
2781        assert!(field_d.is_null(4)); // Missing field "d"
2782    }
2783
2784    #[test]
2785    fn test_nested_struct_extraction_perfectly_shredded() {
2786        // Create nested variant with diverse null patterns
2787        let variant_array = create_comprehensive_nested_shredded_variant();
2788        println!("variant_array: {variant_array:?}");
2789
2790        // Request 3-level nested struct type {"outer": {"inner": INT}}
2791        let inner_field = Field::new("inner", DataType::Int32, true);
2792        let inner_type = DataType::Struct(Fields::from(vec![inner_field]));
2793        let outer_field = Field::new("outer", inner_type, true);
2794        let result_type = DataType::Struct(Fields::from(vec![outer_field]));
2795
2796        let options = GetOptions {
2797            path: VariantPath::default(),
2798            as_type: Some(Arc::new(Field::new("result", result_type, true))),
2799            cast_options: CastOptions::default(),
2800        };
2801
2802        let result = variant_get(&variant_array, options).unwrap();
2803        println!("result: {result:?}");
2804
2805        // Verify the result is a StructArray with "outer" field and 4 rows
2806        let outer_struct = result.as_any().downcast_ref::<StructArray>().unwrap();
2807        assert_eq!(outer_struct.len(), 4);
2808        assert_eq!(outer_struct.num_columns(), 1);
2809
2810        // Get the "inner" struct column
2811        let inner_struct = outer_struct
2812            .column(0)
2813            .as_any()
2814            .downcast_ref::<StructArray>()
2815            .unwrap();
2816        assert_eq!(inner_struct.num_columns(), 1);
2817
2818        // Get the "leaf" field (Int32 values)
2819        let leaf_field = inner_struct
2820            .column(0)
2821            .as_any()
2822            .downcast_ref::<Int32Array>()
2823            .unwrap();
2824
2825        // Row 0: Normal nested {"outer": {"inner": {"leaf": 42}}}
2826        assert!(!outer_struct.is_null(0));
2827        assert!(!inner_struct.is_null(0));
2828        assert_eq!(leaf_field.value(0), 42);
2829
2830        // Row 1: "inner" field missing → {outer: {inner: NULL}}
2831        assert!(!outer_struct.is_null(1));
2832        assert!(!inner_struct.is_null(1)); // outer exists, inner exists but leaf is NULL
2833        assert!(leaf_field.is_null(1)); // leaf field is NULL
2834
2835        // Row 2: "outer" field missing → {outer: NULL}
2836        assert!(!outer_struct.is_null(2));
2837        assert!(inner_struct.is_null(2)); // outer field is NULL
2838
2839        // Row 3: Top-level NULL → struct-level NULL
2840        assert!(outer_struct.is_null(3));
2841    }
2842
2843    #[test]
2844    fn test_path_based_null_masks_one_step() {
2845        // Create nested variant with diverse null patterns
2846        let variant_array = create_comprehensive_nested_shredded_variant();
2847
2848        // Extract "outer" field using path-based variant_get
2849        let path = VariantPath::from("outer");
2850        let inner_field = Field::new("inner", DataType::Int32, true);
2851        let result_type = DataType::Struct(Fields::from(vec![inner_field]));
2852
2853        let options = GetOptions {
2854            path,
2855            as_type: Some(Arc::new(Field::new("result", result_type, true))),
2856            cast_options: CastOptions::default(),
2857        };
2858
2859        let result = variant_get(&variant_array, options).unwrap();
2860
2861        // Verify the result is a StructArray with "inner" field and 4 rows
2862        let outer_result = result.as_any().downcast_ref::<StructArray>().unwrap();
2863        assert_eq!(outer_result.len(), 4);
2864        assert_eq!(outer_result.num_columns(), 1);
2865
2866        // Get the "inner" field (Int32 values)
2867        let inner_field = outer_result
2868            .column(0)
2869            .as_any()
2870            .downcast_ref::<Int32Array>()
2871            .unwrap();
2872
2873        // Row 0: Normal nested {"outer": {"inner": 42}} → {"inner": 42}
2874        assert!(!outer_result.is_null(0));
2875        assert_eq!(inner_field.value(0), 42);
2876
2877        // Row 1: Inner field null {"outer": {"inner": null}} → {"inner": null}
2878        assert!(!outer_result.is_null(1));
2879        assert!(inner_field.is_null(1));
2880
2881        // Row 2: Outer field null {"outer": null} → null (entire struct is null)
2882        assert!(outer_result.is_null(2));
2883
2884        // Row 3: Top-level null → null (entire struct is null)
2885        assert!(outer_result.is_null(3));
2886    }
2887
2888    #[test]
2889    fn test_path_based_null_masks_two_steps() {
2890        // Create nested variant with diverse null patterns
2891        let variant_array = create_comprehensive_nested_shredded_variant();
2892
2893        // Extract "outer.inner" field using path-based variant_get
2894        let path = VariantPath::from("outer").join("inner");
2895
2896        let options = GetOptions {
2897            path,
2898            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2899            cast_options: CastOptions::default(),
2900        };
2901
2902        let result = variant_get(&variant_array, options).unwrap();
2903
2904        // Verify the result is an Int32Array with 4 rows
2905        let int_result = result.as_any().downcast_ref::<Int32Array>().unwrap();
2906        assert_eq!(int_result.len(), 4);
2907
2908        // Row 0: Normal nested {"outer": {"inner": 42}} → 42
2909        assert!(!int_result.is_null(0));
2910        assert_eq!(int_result.value(0), 42);
2911
2912        // Row 1: Inner field null {"outer": {"inner": null}} → null
2913        assert!(int_result.is_null(1));
2914
2915        // Row 2: Outer field null {"outer": null} → null (path traversal fails)
2916        assert!(int_result.is_null(2));
2917
2918        // Row 3: Top-level null → null (path traversal fails)
2919        assert!(int_result.is_null(3));
2920    }
2921
2922    #[test]
2923    fn test_struct_extraction_mixed_and_unshredded() {
2924        // Create a partially shredded variant (x shredded, y not)
2925        let variant_array = create_mixed_and_unshredded_variant();
2926
2927        // Request struct with both shredded and unshredded fields
2928        let struct_fields = Fields::from(vec![
2929            Field::new("x", DataType::Int32, true),
2930            Field::new("y", DataType::Int32, true),
2931        ]);
2932        let struct_type = DataType::Struct(struct_fields);
2933
2934        let options = GetOptions {
2935            path: VariantPath::default(),
2936            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
2937            cast_options: CastOptions::default(),
2938        };
2939
2940        let result = variant_get(&variant_array, options).unwrap();
2941
2942        // Verify the mixed shredding works (should succeed with current implementation)
2943        let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
2944        assert_eq!(struct_result.len(), 4);
2945        assert_eq!(struct_result.num_columns(), 2);
2946
2947        let field_x = struct_result
2948            .column(0)
2949            .as_any()
2950            .downcast_ref::<Int32Array>()
2951            .unwrap();
2952        let field_y = struct_result
2953            .column(1)
2954            .as_any()
2955            .downcast_ref::<Int32Array>()
2956            .unwrap();
2957
2958        // Row 0: {"x": 1, "y": 42} - x from shredded, y from value field
2959        assert_eq!(field_x.value(0), 1);
2960        assert_eq!(field_y.value(0), 42);
2961
2962        // Row 1: {"x": 2} - x from shredded, y missing (perfect shredding)
2963        assert_eq!(field_x.value(1), 2);
2964        assert!(field_y.is_null(1));
2965
2966        // Row 2: {"x": 3, "y": null} - x from shredded, y explicitly null in value
2967        assert_eq!(field_x.value(2), 3);
2968        assert!(field_y.is_null(2));
2969
2970        // Row 3: top-level null - entire struct row should be null
2971        assert!(struct_result.is_null(3));
2972    }
2973
2974    /// Test that demonstrates the actual struct row builder gap
2975    /// This test should fail because it hits unshredded nested structs
2976    #[test]
2977    fn test_struct_row_builder_gap_demonstration() {
2978        // Create completely unshredded JSON variant (no typed_value at all)
2979        let json_strings = vec![
2980            r#"{"outer": {"inner": 42}}"#,
2981            r#"{"outer": {"inner": 100}}"#,
2982        ];
2983        let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
2984        let variant_array = json_to_variant(&string_array).unwrap();
2985
2986        // Request nested struct - this should fail at the row builder level
2987        let inner_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]);
2988        let inner_struct_type = DataType::Struct(inner_fields);
2989        let outer_fields = Fields::from(vec![Field::new("outer", inner_struct_type, true)]);
2990        let outer_struct_type = DataType::Struct(outer_fields);
2991
2992        let options = GetOptions {
2993            path: VariantPath::default(),
2994            as_type: Some(Arc::new(Field::new("result", outer_struct_type, true))),
2995            cast_options: CastOptions::default(),
2996        };
2997
2998        let variant_array_ref = ArrayRef::from(variant_array);
2999        let result = variant_get(&variant_array_ref, options);
3000
3001        // Should fail with NotYetImplemented when the row builder tries to handle struct type
3002        assert!(result.is_err());
3003        let error = result.unwrap_err();
3004        assert!(error.to_string().contains("Not yet implemented"));
3005    }
3006
3007    /// Create comprehensive shredded variant with diverse null patterns and empty objects
3008    /// Rows: normal values, top-level null, missing field a, missing field b, empty object
3009    fn create_comprehensive_shredded_variant() -> ArrayRef {
3010        let (metadata, _) = {
3011            let mut builder = parquet_variant::VariantBuilder::new();
3012            let obj = builder.new_object();
3013            obj.finish();
3014            builder.finish()
3015        };
3016
3017        // Create null buffer for top-level nulls
3018        let nulls = NullBuffer::from(vec![
3019            true,  // row 0: normal values
3020            false, // row 1: top-level null
3021            true,  // row 2: missing field a
3022            true,  // row 3: missing field b
3023            true,  // row 4: empty object
3024        ]);
3025
3026        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 5));
3027
3028        // Create shredded fields with different null patterns
3029        // Field "a": present in rows 0,3 (missing in rows 1,2,4)
3030        let a_field_typed_value = Int32Array::from(vec![Some(1), None, None, Some(1), None]);
3031        let a_field_struct = StructArrayBuilder::new()
3032            .with_field("typed_value", Arc::new(a_field_typed_value), true)
3033            .build();
3034        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_field_struct)
3035            .expect("should create ShreddedVariantFieldArray for a");
3036
3037        // Field "b": present in rows 0,2 (missing in rows 1,3,4)
3038        let b_field_typed_value = Int32Array::from(vec![Some(2), None, Some(2), None, None]);
3039        let b_field_struct = StructArrayBuilder::new()
3040            .with_field("typed_value", Arc::new(b_field_typed_value), true)
3041            .build();
3042        let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_field_struct)
3043            .expect("should create ShreddedVariantFieldArray for b");
3044
3045        // Field "c": present in row 0 only (missing in all other rows)
3046        let c_field_typed_value = Int32Array::from(vec![Some(3), None, None, None, None]);
3047        let c_field_struct = StructArrayBuilder::new()
3048            .with_field("typed_value", Arc::new(c_field_typed_value), true)
3049            .build();
3050        let c_field_shredded = ShreddedVariantFieldArray::try_new(&c_field_struct)
3051            .expect("should create ShreddedVariantFieldArray for c");
3052
3053        // Create main typed_value struct
3054        let typed_value_fields = Fields::from(vec![
3055            Field::new("a", a_field_shredded.data_type().clone(), true),
3056            Field::new("b", b_field_shredded.data_type().clone(), true),
3057            Field::new("c", c_field_shredded.data_type().clone(), true),
3058        ]);
3059        let typed_value_struct = StructArray::try_new(
3060            typed_value_fields,
3061            vec![
3062                ArrayRef::from(a_field_shredded),
3063                ArrayRef::from(b_field_shredded),
3064                ArrayRef::from(c_field_shredded),
3065            ],
3066            None,
3067        )
3068        .unwrap();
3069
3070        // Build final VariantArray with top-level nulls
3071        let struct_array = StructArrayBuilder::new()
3072            .with_field("metadata", Arc::new(metadata_array), false)
3073            .with_field("typed_value", Arc::new(typed_value_struct), true)
3074            .with_nulls(nulls)
3075            .build();
3076
3077        Arc::new(struct_array)
3078    }
3079
3080    /// Create comprehensive nested shredded variant with diverse null patterns
3081    /// Represents 3-level structure: variant -> outer -> inner (INT value)
3082    /// The shredding schema is: {"metadata": BINARY, "typed_value": {"outer": {"typed_value": {"inner": {"typed_value": INT}}}}}
3083    /// Rows: normal nested value, inner field null, outer field null, top-level null
3084    fn create_comprehensive_nested_shredded_variant() -> ArrayRef {
3085        // Create the inner level: contains typed_value with Int32 values
3086        // Row 0: has value 42, Row 1: inner null, Row 2: outer null, Row 3: top-level null
3087        let inner_typed_value = Int32Array::from(vec![Some(42), None, None, None]); // dummy value for row 2
3088        let inner = StructArrayBuilder::new()
3089            .with_field("typed_value", Arc::new(inner_typed_value), true)
3090            .build();
3091        let inner = ShreddedVariantFieldArray::try_new(&inner).unwrap();
3092
3093        let outer_typed_value_nulls = NullBuffer::from(vec![
3094            true,  // row 0: inner struct exists with typed_value=42
3095            false, // row 1: inner field NULL
3096            false, // row 2: outer field NULL
3097            false, // row 3: top-level NULL
3098        ]);
3099        let outer_typed_value = StructArrayBuilder::new()
3100            .with_field("inner", ArrayRef::from(inner), false)
3101            .with_nulls(outer_typed_value_nulls)
3102            .build();
3103
3104        let outer = StructArrayBuilder::new()
3105            .with_field("typed_value", Arc::new(outer_typed_value), true)
3106            .build();
3107        let outer = ShreddedVariantFieldArray::try_new(&outer).unwrap();
3108
3109        let typed_value_nulls = NullBuffer::from(vec![
3110            true,  // row 0: inner struct exists with typed_value=42
3111            true,  // row 1: inner field NULL
3112            false, // row 2: outer field NULL
3113            false, // row 3: top-level NULL
3114        ]);
3115        let typed_value = StructArrayBuilder::new()
3116            .with_field("outer", ArrayRef::from(outer), false)
3117            .with_nulls(typed_value_nulls)
3118            .build();
3119
3120        // Build final VariantArray with top-level nulls
3121        let metadata_array =
3122            BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 4));
3123        let nulls = NullBuffer::from(vec![
3124            true,  // row 0: inner struct exists with typed_value=42
3125            true,  // row 1: inner field NULL
3126            true,  // row 2: outer field NULL
3127            false, // row 3: top-level NULL
3128        ]);
3129        let struct_array = StructArrayBuilder::new()
3130            .with_field("metadata", Arc::new(metadata_array), false)
3131            .with_field("typed_value", Arc::new(typed_value), true)
3132            .with_nulls(nulls)
3133            .build();
3134
3135        Arc::new(struct_array)
3136    }
3137
3138    /// Create variant with mixed shredding (spec-compliant) including null scenarios
3139    /// Field "x" is globally shredded, field "y" is never shredded
3140    fn create_mixed_and_unshredded_variant() -> ArrayRef {
3141        // Create spec-compliant mixed shredding:
3142        // - Field "x" is globally shredded (has typed_value column)
3143        // - Field "y" is never shredded (only appears in value field when present)
3144
3145        let (metadata, y_field_value) = {
3146            let mut builder = parquet_variant::VariantBuilder::new();
3147            let mut obj = builder.new_object();
3148            obj.insert("y", Variant::from(42));
3149            obj.finish();
3150            builder.finish()
3151        };
3152
3153        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
3154
3155        // Value field contains objects with unshredded fields only (never contains "x")
3156        // Row 0: {"y": "foo"} - x is shredded out, y remains in value
3157        // Row 1: {} - both x and y are absent (perfect shredding for x, y missing)
3158        // Row 2: {"y": null} - x is shredded out, y explicitly null
3159        // Row 3: top-level null (encoded in VariantArray's null mask, but fields contain valid data)
3160
3161        let empty_object_value = {
3162            let mut builder = parquet_variant::VariantBuilder::new();
3163            builder.new_object().finish();
3164            let (_, value) = builder.finish();
3165            value
3166        };
3167
3168        let y_null_value = {
3169            let mut builder = parquet_variant::VariantBuilder::new();
3170            builder.new_object().with_field("y", Variant::Null).finish();
3171            let (_, value) = builder.finish();
3172            value
3173        };
3174
3175        let value_array = BinaryViewArray::from(vec![
3176            Some(y_field_value.as_slice()),      // Row 0: {"y": 42}
3177            Some(empty_object_value.as_slice()), // Row 1: {}
3178            Some(y_null_value.as_slice()),       // Row 2: {"y": null}
3179            Some(empty_object_value.as_slice()), // Row 3: top-level null (but value field contains valid data)
3180        ]);
3181
3182        // Create shredded field "x" (globally shredded - never appears in value field)
3183        // For top-level null row, the field still needs valid content (not null)
3184        let x_field_typed_value = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(0)]);
3185        let x_field_struct = StructArrayBuilder::new()
3186            .with_field("typed_value", Arc::new(x_field_typed_value), true)
3187            .build();
3188        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
3189            .expect("should create ShreddedVariantFieldArray for x");
3190
3191        // Create main typed_value struct (only contains shredded fields)
3192        let typed_value_struct = StructArrayBuilder::new()
3193            .with_field("x", ArrayRef::from(x_field_shredded), false)
3194            .build();
3195
3196        // Build VariantArray with both value and typed_value (PartiallyShredded)
3197        // Top-level null is encoded in the main StructArray's null mask
3198        let variant_nulls = NullBuffer::from(vec![true, true, true, false]); // Row 3 is top-level null
3199        let struct_array = StructArrayBuilder::new()
3200            .with_field("metadata", Arc::new(metadata_array), false)
3201            .with_field("value", Arc::new(value_array), true)
3202            .with_field("typed_value", Arc::new(typed_value_struct), true)
3203            .with_nulls(variant_nulls)
3204            .build();
3205
3206        Arc::new(struct_array)
3207    }
3208
3209    #[test]
3210    fn get_decimal32_rescaled_to_scale2() {
3211        // Build unshredded variant values with different scales
3212        let mut builder = crate::VariantArrayBuilder::new(5);
3213        builder.append_variant(VariantDecimal4::try_new(1234, 2).unwrap().into()); // 12.34
3214        builder.append_variant(VariantDecimal4::try_new(1234, 3).unwrap().into()); // 1.234
3215        builder.append_variant(VariantDecimal4::try_new(1234, 0).unwrap().into()); // 1234
3216        builder.append_null();
3217        builder.append_variant(
3218            VariantDecimal8::try_new((VariantDecimal4::MAX_UNSCALED_VALUE as i64) + 1, 3)
3219                .unwrap()
3220                .into(),
3221        ); // should fit into Decimal32
3222        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3223
3224        let field = Field::new("result", DataType::Decimal32(9, 2), true);
3225        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3226        let result = variant_get(&variant_array, options).unwrap();
3227        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3228
3229        assert_eq!(result.precision(), 9);
3230        assert_eq!(result.scale(), 2);
3231        assert_eq!(result.value(0), 1234);
3232        assert_eq!(result.value(1), 123);
3233        assert_eq!(result.value(2), 123400);
3234        assert!(result.is_null(3));
3235        assert_eq!(
3236            result.value(4),
3237            VariantDecimal4::MAX_UNSCALED_VALUE / 10 + 1
3238        ); // should not be null as the final result fits into Decimal32
3239    }
3240
3241    #[test]
3242    fn get_decimal32_scale_down_rounding() {
3243        let mut builder = crate::VariantArrayBuilder::new(7);
3244        builder.append_variant(VariantDecimal4::try_new(1235, 0).unwrap().into());
3245        builder.append_variant(VariantDecimal4::try_new(1245, 0).unwrap().into());
3246        builder.append_variant(VariantDecimal4::try_new(-1235, 0).unwrap().into());
3247        builder.append_variant(VariantDecimal4::try_new(-1245, 0).unwrap().into());
3248        builder.append_variant(VariantDecimal4::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
3249        builder.append_variant(VariantDecimal4::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
3250        builder.append_variant(VariantDecimal4::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
3251        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3252
3253        let field = Field::new("result", DataType::Decimal32(9, -1), true);
3254        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3255        let result = variant_get(&variant_array, options).unwrap();
3256        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3257
3258        assert_eq!(result.precision(), 9);
3259        assert_eq!(result.scale(), -1);
3260        assert_eq!(result.value(0), 124);
3261        assert_eq!(result.value(1), 125);
3262        assert_eq!(result.value(2), -124);
3263        assert_eq!(result.value(3), -125);
3264        assert_eq!(result.value(4), 1);
3265        assert!(result.is_valid(5));
3266        assert_eq!(result.value(5), 0);
3267        assert_eq!(result.value(6), 1);
3268    }
3269
3270    #[test]
3271    fn get_decimal32_large_scale_reduction() {
3272        let mut builder = crate::VariantArrayBuilder::new(2);
3273        builder.append_variant(
3274            VariantDecimal4::try_new(-VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3275                .unwrap()
3276                .into(),
3277        );
3278        builder.append_variant(
3279            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3280                .unwrap()
3281                .into(),
3282        );
3283        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3284
3285        let field = Field::new("result", DataType::Decimal32(9, -9), true);
3286        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3287        let result = variant_get(&variant_array, options).unwrap();
3288        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3289
3290        assert_eq!(result.precision(), 9);
3291        assert_eq!(result.scale(), -9);
3292        assert_eq!(result.value(0), -1);
3293        assert_eq!(result.value(1), 1);
3294
3295        let field = Field::new("result", DataType::Decimal32(9, -10), true);
3296        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3297        let result = variant_get(&variant_array, options).unwrap();
3298        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3299
3300        assert_eq!(result.precision(), 9);
3301        assert_eq!(result.scale(), -10);
3302        assert!(result.is_valid(0));
3303        assert_eq!(result.value(0), 0);
3304        assert!(result.is_valid(1));
3305        assert_eq!(result.value(1), 0);
3306    }
3307
3308    #[test]
3309    fn get_decimal32_precision_overflow_safe() {
3310        // Exceed Decimal32 after scaling and rounding
3311        let mut builder = crate::VariantArrayBuilder::new(2);
3312        builder.append_variant(
3313            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3314                .unwrap()
3315                .into(),
3316        );
3317        builder.append_variant(
3318            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 9)
3319                .unwrap()
3320                .into(),
3321        ); // integer value round up overflows
3322        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3323
3324        let field = Field::new("result", DataType::Decimal32(2, 2), true);
3325        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3326        let result = variant_get(&variant_array, options).unwrap();
3327        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3328
3329        assert!(result.is_null(0));
3330        assert!(result.is_null(1)); // should overflow because 1.00 does not fit into precision (2)
3331    }
3332
3333    #[test]
3334    fn get_decimal32_precision_overflow_unsafe_errors() {
3335        let mut builder = crate::VariantArrayBuilder::new(1);
3336        builder.append_variant(
3337            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3338                .unwrap()
3339                .into(),
3340        );
3341        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3342
3343        let field = Field::new("result", DataType::Decimal32(9, 2), true);
3344        let cast_options = CastOptions {
3345            safe: false,
3346            ..Default::default()
3347        };
3348        let options = GetOptions::new()
3349            .with_as_type(Some(FieldRef::from(field)))
3350            .with_cast_options(cast_options);
3351        let err = variant_get(&variant_array, options).unwrap_err();
3352
3353        assert!(
3354            err.to_string().contains(
3355                "Failed to cast to Decimal32(precision=9, scale=2) from variant Decimal4"
3356            )
3357        );
3358    }
3359
3360    #[test]
3361    fn get_decimal64_rescaled_to_scale2() {
3362        let mut builder = crate::VariantArrayBuilder::new(5);
3363        builder.append_variant(VariantDecimal8::try_new(1234, 2).unwrap().into()); // 12.34
3364        builder.append_variant(VariantDecimal8::try_new(1234, 3).unwrap().into()); // 1.234
3365        builder.append_variant(VariantDecimal8::try_new(1234, 0).unwrap().into()); // 1234
3366        builder.append_null();
3367        builder.append_variant(
3368            VariantDecimal16::try_new((VariantDecimal8::MAX_UNSCALED_VALUE as i128) + 1, 3)
3369                .unwrap()
3370                .into(),
3371        ); // should fit into Decimal64
3372        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3373
3374        let field = Field::new("result", DataType::Decimal64(18, 2), true);
3375        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3376        let result = variant_get(&variant_array, options).unwrap();
3377        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3378
3379        assert_eq!(result.precision(), 18);
3380        assert_eq!(result.scale(), 2);
3381        assert_eq!(result.value(0), 1234);
3382        assert_eq!(result.value(1), 123);
3383        assert_eq!(result.value(2), 123400);
3384        assert!(result.is_null(3));
3385        assert_eq!(
3386            result.value(4),
3387            VariantDecimal8::MAX_UNSCALED_VALUE / 10 + 1
3388        ); // should not be null as the final result fits into Decimal64
3389    }
3390
3391    #[test]
3392    fn get_decimal64_scale_down_rounding() {
3393        let mut builder = crate::VariantArrayBuilder::new(7);
3394        builder.append_variant(VariantDecimal8::try_new(1235, 0).unwrap().into());
3395        builder.append_variant(VariantDecimal8::try_new(1245, 0).unwrap().into());
3396        builder.append_variant(VariantDecimal8::try_new(-1235, 0).unwrap().into());
3397        builder.append_variant(VariantDecimal8::try_new(-1245, 0).unwrap().into());
3398        builder.append_variant(VariantDecimal8::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
3399        builder.append_variant(VariantDecimal8::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
3400        builder.append_variant(VariantDecimal8::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
3401        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3402
3403        let field = Field::new("result", DataType::Decimal64(18, -1), true);
3404        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3405        let result = variant_get(&variant_array, options).unwrap();
3406        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3407
3408        assert_eq!(result.precision(), 18);
3409        assert_eq!(result.scale(), -1);
3410        assert_eq!(result.value(0), 124);
3411        assert_eq!(result.value(1), 125);
3412        assert_eq!(result.value(2), -124);
3413        assert_eq!(result.value(3), -125);
3414        assert_eq!(result.value(4), 1);
3415        assert!(result.is_valid(5));
3416        assert_eq!(result.value(5), 0);
3417        assert_eq!(result.value(6), 1);
3418    }
3419
3420    #[test]
3421    fn get_decimal64_large_scale_reduction() {
3422        let mut builder = crate::VariantArrayBuilder::new(2);
3423        builder.append_variant(
3424            VariantDecimal8::try_new(-VariantDecimal8::MAX_UNSCALED_VALUE, 0)
3425                .unwrap()
3426                .into(),
3427        );
3428        builder.append_variant(
3429            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
3430                .unwrap()
3431                .into(),
3432        );
3433        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3434
3435        let field = Field::new("result", DataType::Decimal64(18, -18), true);
3436        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3437        let result = variant_get(&variant_array, options).unwrap();
3438        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3439
3440        assert_eq!(result.precision(), 18);
3441        assert_eq!(result.scale(), -18);
3442        assert_eq!(result.value(0), -1);
3443        assert_eq!(result.value(1), 1);
3444
3445        let field = Field::new("result", DataType::Decimal64(18, -19), true);
3446        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3447        let result = variant_get(&variant_array, options).unwrap();
3448        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3449
3450        assert_eq!(result.precision(), 18);
3451        assert_eq!(result.scale(), -19);
3452        assert!(result.is_valid(0));
3453        assert_eq!(result.value(0), 0);
3454        assert!(result.is_valid(1));
3455        assert_eq!(result.value(1), 0);
3456    }
3457
3458    #[test]
3459    fn get_decimal64_precision_overflow_safe() {
3460        // Exceed Decimal64 after scaling and rounding
3461        let mut builder = crate::VariantArrayBuilder::new(2);
3462        builder.append_variant(
3463            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
3464                .unwrap()
3465                .into(),
3466        );
3467        builder.append_variant(
3468            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 18)
3469                .unwrap()
3470                .into(),
3471        ); // integer value round up overflows
3472        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3473
3474        let field = Field::new("result", DataType::Decimal64(2, 2), true);
3475        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3476        let result = variant_get(&variant_array, options).unwrap();
3477        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3478
3479        assert!(result.is_null(0));
3480        assert!(result.is_null(1));
3481    }
3482
3483    #[test]
3484    fn get_decimal64_precision_overflow_unsafe_errors() {
3485        let mut builder = crate::VariantArrayBuilder::new(1);
3486        builder.append_variant(
3487            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
3488                .unwrap()
3489                .into(),
3490        );
3491        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3492
3493        let field = Field::new("result", DataType::Decimal64(18, 2), true);
3494        let cast_options = CastOptions {
3495            safe: false,
3496            ..Default::default()
3497        };
3498        let options = GetOptions::new()
3499            .with_as_type(Some(FieldRef::from(field)))
3500            .with_cast_options(cast_options);
3501        let err = variant_get(&variant_array, options).unwrap_err();
3502
3503        assert!(
3504            err.to_string().contains(
3505                "Failed to cast to Decimal64(precision=18, scale=2) from variant Decimal8"
3506            )
3507        );
3508    }
3509
3510    #[test]
3511    fn get_decimal128_rescaled_to_scale2() {
3512        let mut builder = crate::VariantArrayBuilder::new(4);
3513        builder.append_variant(VariantDecimal16::try_new(1234, 2).unwrap().into());
3514        builder.append_variant(VariantDecimal16::try_new(1234, 3).unwrap().into());
3515        builder.append_variant(VariantDecimal16::try_new(1234, 0).unwrap().into());
3516        builder.append_null();
3517        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3518
3519        let field = Field::new("result", DataType::Decimal128(38, 2), true);
3520        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3521        let result = variant_get(&variant_array, options).unwrap();
3522        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
3523
3524        assert_eq!(result.precision(), 38);
3525        assert_eq!(result.scale(), 2);
3526        assert_eq!(result.value(0), 1234);
3527        assert_eq!(result.value(1), 123);
3528        assert_eq!(result.value(2), 123400);
3529        assert!(result.is_null(3));
3530    }
3531
3532    #[test]
3533    fn get_decimal128_scale_down_rounding() {
3534        let mut builder = crate::VariantArrayBuilder::new(7);
3535        builder.append_variant(VariantDecimal16::try_new(1235, 0).unwrap().into());
3536        builder.append_variant(VariantDecimal16::try_new(1245, 0).unwrap().into());
3537        builder.append_variant(VariantDecimal16::try_new(-1235, 0).unwrap().into());
3538        builder.append_variant(VariantDecimal16::try_new(-1245, 0).unwrap().into());
3539        builder.append_variant(VariantDecimal16::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
3540        builder.append_variant(VariantDecimal16::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
3541        builder.append_variant(VariantDecimal16::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
3542        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3543
3544        let field = Field::new("result", DataType::Decimal128(38, -1), true);
3545        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3546        let result = variant_get(&variant_array, options).unwrap();
3547        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
3548
3549        assert_eq!(result.precision(), 38);
3550        assert_eq!(result.scale(), -1);
3551        assert_eq!(result.value(0), 124);
3552        assert_eq!(result.value(1), 125);
3553        assert_eq!(result.value(2), -124);
3554        assert_eq!(result.value(3), -125);
3555        assert_eq!(result.value(4), 1);
3556        assert!(result.is_valid(5));
3557        assert_eq!(result.value(5), 0);
3558        assert_eq!(result.value(6), 1);
3559    }
3560
3561    #[test]
3562    fn get_decimal128_precision_overflow_safe() {
3563        // Exceed Decimal128 after scaling and rounding
3564        let mut builder = crate::VariantArrayBuilder::new(2);
3565        builder.append_variant(
3566            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
3567                .unwrap()
3568                .into(),
3569        );
3570        builder.append_variant(
3571            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 38)
3572                .unwrap()
3573                .into(),
3574        ); // integer value round up overflows
3575        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3576
3577        let field = Field::new("result", DataType::Decimal128(2, 2), true);
3578        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3579        let result = variant_get(&variant_array, options).unwrap();
3580        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
3581
3582        assert!(result.is_null(0));
3583        assert!(result.is_null(1)); // should overflow because 1.00 does not fit into precision (2)
3584    }
3585
3586    #[test]
3587    fn get_decimal128_precision_overflow_unsafe_errors() {
3588        let mut builder = crate::VariantArrayBuilder::new(1);
3589        builder.append_variant(
3590            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
3591                .unwrap()
3592                .into(),
3593        );
3594        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3595
3596        let field = Field::new("result", DataType::Decimal128(38, 2), true);
3597        let cast_options = CastOptions {
3598            safe: false,
3599            ..Default::default()
3600        };
3601        let options = GetOptions::new()
3602            .with_as_type(Some(FieldRef::from(field)))
3603            .with_cast_options(cast_options);
3604        let err = variant_get(&variant_array, options).unwrap_err();
3605
3606        assert!(err.to_string().contains(
3607            "Failed to cast to Decimal128(precision=38, scale=2) from variant Decimal16"
3608        ));
3609    }
3610
3611    #[test]
3612    fn get_decimal256_rescaled_to_scale2() {
3613        // Build unshredded variant values with different scales using Decimal16 source
3614        let mut builder = crate::VariantArrayBuilder::new(4);
3615        builder.append_variant(VariantDecimal16::try_new(1234, 2).unwrap().into()); // 12.34
3616        builder.append_variant(VariantDecimal16::try_new(1234, 3).unwrap().into()); // 1.234
3617        builder.append_variant(VariantDecimal16::try_new(1234, 0).unwrap().into()); // 1234
3618        builder.append_null();
3619        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3620
3621        let field = Field::new("result", DataType::Decimal256(76, 2), true);
3622        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3623        let result = variant_get(&variant_array, options).unwrap();
3624        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
3625
3626        assert_eq!(result.precision(), 76);
3627        assert_eq!(result.scale(), 2);
3628        assert_eq!(result.value(0), i256::from_i128(1234));
3629        assert_eq!(result.value(1), i256::from_i128(123));
3630        assert_eq!(result.value(2), i256::from_i128(123400));
3631        assert!(result.is_null(3));
3632    }
3633
3634    #[test]
3635    fn get_decimal256_scale_down_rounding() {
3636        let mut builder = crate::VariantArrayBuilder::new(7);
3637        builder.append_variant(VariantDecimal16::try_new(1235, 0).unwrap().into());
3638        builder.append_variant(VariantDecimal16::try_new(1245, 0).unwrap().into());
3639        builder.append_variant(VariantDecimal16::try_new(-1235, 0).unwrap().into());
3640        builder.append_variant(VariantDecimal16::try_new(-1245, 0).unwrap().into());
3641        builder.append_variant(VariantDecimal16::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
3642        builder.append_variant(VariantDecimal16::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
3643        builder.append_variant(VariantDecimal16::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
3644        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3645
3646        let field = Field::new("result", DataType::Decimal256(76, -1), true);
3647        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3648        let result = variant_get(&variant_array, options).unwrap();
3649        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
3650
3651        assert_eq!(result.precision(), 76);
3652        assert_eq!(result.scale(), -1);
3653        assert_eq!(result.value(0), i256::from_i128(124));
3654        assert_eq!(result.value(1), i256::from_i128(125));
3655        assert_eq!(result.value(2), i256::from_i128(-124));
3656        assert_eq!(result.value(3), i256::from_i128(-125));
3657        assert_eq!(result.value(4), i256::from_i128(1));
3658        assert!(result.is_valid(5));
3659        assert_eq!(result.value(5), i256::from_i128(0));
3660        assert_eq!(result.value(6), i256::from_i128(1));
3661    }
3662
3663    #[test]
3664    fn get_decimal256_precision_overflow_safe() {
3665        // Exceed Decimal128 max precision (38) after scaling
3666        let mut builder = crate::VariantArrayBuilder::new(2);
3667        builder.append_variant(
3668            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1)
3669                .unwrap()
3670                .into(),
3671        );
3672        builder.append_variant(
3673            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
3674                .unwrap()
3675                .into(),
3676        );
3677        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3678
3679        let field = Field::new("result", DataType::Decimal256(76, 39), true);
3680        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3681        let result = variant_get(&variant_array, options).unwrap();
3682        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
3683
3684        // Input is Decimal16 with integer = 10^38-1 and scale = 1, target scale = 39
3685        // So expected integer is (10^38-1) * 10^(39-1) = (10^38-1) * 10^38
3686        let base = i256::from_i128(10);
3687        let factor = base.checked_pow(38).unwrap();
3688        let expected = i256::from_i128(VariantDecimal16::MAX_UNSCALED_VALUE)
3689            .checked_mul(factor)
3690            .unwrap();
3691        assert_eq!(result.value(0), expected);
3692        assert!(result.is_null(1));
3693    }
3694
3695    #[test]
3696    fn get_decimal256_precision_overflow_unsafe_errors() {
3697        // Exceed Decimal128 max precision (38) after scaling
3698        let mut builder = crate::VariantArrayBuilder::new(2);
3699        builder.append_variant(
3700            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1)
3701                .unwrap()
3702                .into(),
3703        );
3704        builder.append_variant(
3705            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
3706                .unwrap()
3707                .into(),
3708        );
3709        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3710
3711        let field = Field::new("result", DataType::Decimal256(76, 39), true);
3712        let cast_options = CastOptions {
3713            safe: false,
3714            ..Default::default()
3715        };
3716        let options = GetOptions::new()
3717            .with_as_type(Some(FieldRef::from(field)))
3718            .with_cast_options(cast_options);
3719        let err = variant_get(&variant_array, options).unwrap_err();
3720
3721        assert!(err.to_string().contains(
3722            "Failed to cast to Decimal256(precision=76, scale=39) from variant Decimal16"
3723        ));
3724    }
3725
3726    #[test]
3727    fn get_non_supported_temporal_types_error() {
3728        let values = vec![None, Some(Variant::Null), Some(Variant::BooleanFalse)];
3729        let variant_array: ArrayRef = ArrayRef::from(VariantArray::from_iter(values));
3730
3731        let test_cases = vec![
3732            FieldRef::from(Field::new(
3733                "result",
3734                DataType::Duration(TimeUnit::Microsecond),
3735                true,
3736            )),
3737            FieldRef::from(Field::new(
3738                "result",
3739                DataType::Interval(IntervalUnit::YearMonth),
3740                true,
3741            )),
3742        ];
3743
3744        for field in test_cases {
3745            let options = GetOptions::new().with_as_type(Some(field));
3746            let err = variant_get(&variant_array, options).unwrap_err();
3747            assert!(
3748                err.to_string()
3749                    .contains("Casting Variant to duration/interval types is not supported")
3750            );
3751        }
3752    }
3753
3754    perfectly_shredded_variant_array_fn!(perfectly_shredded_invalid_time_variant_array, || {
3755        // 86401000000 is invalid for Time64Microsecond (max is 86400000000)
3756        Time64MicrosecondArray::from(vec![
3757            Some(86401000000),
3758            Some(86401000000),
3759            Some(86401000000),
3760        ])
3761    });
3762
3763    #[test]
3764    fn test_variant_get_error_when_cast_failure_and_safe_false() {
3765        let variant_array = perfectly_shredded_invalid_time_variant_array();
3766
3767        let field = Field::new("result", DataType::Time64(TimeUnit::Microsecond), true);
3768        let cast_options = CastOptions {
3769            safe: false, // Will error on cast failure
3770            ..Default::default()
3771        };
3772        let options = GetOptions::new()
3773            .with_as_type(Some(FieldRef::from(field)))
3774            .with_cast_options(cast_options);
3775        let err = variant_get(&variant_array, options).unwrap_err();
3776        assert!(
3777            err.to_string().contains(
3778                "Cast error: Cast failed at index 0 (array type: Time64(µs)): Invalid microsecond from midnight: 86401000000"
3779            )
3780        );
3781    }
3782
3783    #[test]
3784    fn test_variant_get_return_null_when_cast_failure_and_safe_true() {
3785        let variant_array = perfectly_shredded_invalid_time_variant_array();
3786
3787        let field = Field::new("result", DataType::Time64(TimeUnit::Microsecond), true);
3788        let cast_options = CastOptions {
3789            safe: true, // Will return null on cast failure
3790            ..Default::default()
3791        };
3792        let options = GetOptions::new()
3793            .with_as_type(Some(FieldRef::from(field)))
3794            .with_cast_options(cast_options);
3795        let result = variant_get(&variant_array, options).unwrap();
3796        assert_eq!(3, result.len());
3797
3798        for i in 0..3 {
3799            assert!(result.is_null(i));
3800        }
3801    }
3802}