Skip to main content

parquet_variant_compute/
variant_get.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use arrow::{
18    array::{
19        self, Array, ArrayRef, GenericListArray, GenericListViewArray, ListLikeArray, StructArray,
20        UInt64Array, make_array,
21    },
22    buffer::NullBuffer,
23    compute::{CastOptions, take},
24    datatypes::Field,
25    error::Result,
26};
27use arrow_schema::{ArrowError, DataType, FieldRef};
28use parquet_variant::{VariantPath, VariantPathElement};
29
30use crate::ShreddingState;
31use crate::variant_to_arrow::make_variant_to_arrow_row_builder;
32use crate::{VariantArray, VariantType, unshred_variant};
33
34use arrow::array::AsArray;
35use std::sync::Arc;
36
37pub(crate) enum ShreddedPathStep {
38    /// Path step succeeded, return the new shredding state
39    Success(ShreddingState),
40    /// The path element is not present in the `typed_value` column and there is no `value` column,
41    /// so we know it does not exist. It, and all paths under it, are all-NULL.
42    Missing,
43    /// The path element is not present in the `typed_value` column and must be retrieved from the `value`
44    /// column instead. The caller should be prepared to handle any value, including the requested
45    /// type, an arbitrary "wrong" type, or `Variant::Null`.
46    NotShredded,
47}
48
49/// Build the next shredding state by taking one list-like element (at `index`) per input row.
50///
51fn take_list_like_index_as_shredding_state<L: ListLikeArray + 'static>(
52    typed_value: &dyn Array,
53    index: usize,
54) -> Result<Option<ShreddingState>> {
55    let list_array = typed_value.as_any().downcast_ref::<L>().ok_or_else(|| {
56        ArrowError::ComputeError(format!(
57            "Expected array type '{}' while handling list-like path step, got '{}'",
58            std::any::type_name::<L>(),
59            typed_value.data_type()
60        ))
61    })?;
62
63    let values = list_array.values();
64
65    let Some(struct_array) = values.as_struct_opt() else {
66        return Ok(None);
67    };
68    let shredding_state = ShreddingState::try_from(struct_array)?;
69
70    let value_array = shredding_state.value_column();
71    let typed_array = shredding_state.typed_value_column();
72
73    // If list elements have neither typed nor fallback value, this path step is missing.
74    if value_array.is_none() && typed_array.is_none() {
75        return Ok(None);
76    }
77
78    let mut take_indices = Vec::with_capacity(list_array.len());
79    for row in 0..list_array.len() {
80        let row_range = list_array.element_range(row);
81        let take_index = (index < row_range.len()).then(|| (row_range.start + index) as u64);
82        take_indices.push(take_index);
83    }
84
85    let index_array = UInt64Array::from(take_indices);
86
87    // Gather both typed and fallback values at the requested element index.
88    let taken_value = value_array
89        .map(|value| take(value, &index_array, None))
90        .transpose()?;
91    let taken_typed = typed_array
92        .map(|typed| take(typed, &index_array, None))
93        .transpose()?;
94
95    Ok(Some(ShreddingState::new(taken_value, taken_typed)))
96}
97
98/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try to take one path step
99/// deeper. For a `VariantPathElement::Field`, if there is no `typed_value` at this level, if
100/// `typed_value` is not a struct, or if the requested field name does not exist, traversal returns
101/// a missing-path step (`Missing` or `NotShredded` depending on whether `value` exists).
102///
103/// Safe-cast behavior (`cast_options.safe = true`):
104/// - Type mismatch during path traversal (for example field access on non-struct, index access on
105///   non-list) returns [`ShreddedPathStep::Missing`] or [`ShreddedPathStep::NotShredded`], allowing
106///   the caller to continue with null/fallback semantics.
107/// - List index out-of-bounds produces nulls for the corresponding rows.
108///
109/// Unsafe-cast behavior (`cast_options.safe = false`):
110/// - Field access on non-struct returns [`ArrowError::CastError`].
111/// - List index path steps follow JSONPath semantics and return missing/null for non-list or
112///   out-of-bounds rows.
113pub(crate) fn follow_shredded_path_element(
114    shredding_state: &ShreddingState,
115    path_element: &VariantPathElement<'_>,
116    _cast_options: &CastOptions,
117) -> Result<ShreddedPathStep> {
118    // If the requested path element is not present in `typed_value`, and `value` is missing, then
119    // we know it does not exist; it, and all paths under it, are all-NULL.
120    let missing_path_step = || match shredding_state.value_column() {
121        Some(_) => ShreddedPathStep::NotShredded,
122        None => ShreddedPathStep::Missing,
123    };
124
125    let Some(typed_value) = shredding_state.typed_value_column() else {
126        return Ok(missing_path_step());
127    };
128
129    match path_element {
130        VariantPathElement::Field { name } => {
131            // Try to step into the requested field name of a struct.
132            // First, try to downcast to StructArray
133            let Some(struct_array) = typed_value.as_struct_opt() else {
134                // Object field path step follows JSONPath semantics and returns missing path step (NotShredded/Missing) on non-struct path
135                return Ok(missing_path_step());
136            };
137
138            // Now try to find the column - missing column in a present struct is just missing data
139            let Some(field) = struct_array.column_by_name(name) else {
140                // Missing column in a present struct is just missing, not wrong - return Ok
141                return Ok(missing_path_step());
142            };
143
144            let struct_array = field.as_struct_opt().ok_or_else(|| {
145                // TODO: Should we blow up? Or just end the traversal and let the normal
146                // variant pathing code sort out the mess that it must anyway be
147                // prepared to handle?
148                ArrowError::InvalidArgumentError(format!(
149                    "Expected Struct array while following path, got {}",
150                    field.data_type(),
151                ))
152            })?;
153
154            let state = ShreddingState::try_from(struct_array)?;
155            Ok(ShreddedPathStep::Success(state))
156        }
157        VariantPathElement::Index { index } => {
158            let state = match typed_value.data_type() {
159                DataType::List(_) => take_list_like_index_as_shredding_state::<
160                    GenericListArray<i32>,
161                >(typed_value.as_ref(), *index)?,
162                DataType::LargeList(_) => take_list_like_index_as_shredding_state::<
163                    GenericListArray<i64>,
164                >(typed_value.as_ref(), *index)?,
165                DataType::ListView(_) => take_list_like_index_as_shredding_state::<
166                    GenericListViewArray<i32>,
167                >(typed_value.as_ref(), *index)?,
168                DataType::LargeListView(_) => take_list_like_index_as_shredding_state::<
169                    GenericListViewArray<i64>,
170                >(typed_value.as_ref(), *index)?,
171                _ => {
172                    // JSONPath semantics: indexing a non-list yields no match.
173                    return Ok(missing_path_step());
174                }
175            };
176
177            match state {
178                Some(state) => Ok(ShreddedPathStep::Success(state)),
179                None => Ok(missing_path_step()),
180            }
181        }
182    }
183}
184
185/// Follows the given path as far as possible through shredded variant fields. If the path ends on a
186/// shredded field, return it directly. Otherwise, use a row shredder to follow the rest of the path
187/// and extract the requested value on a per-row basis.
188fn shredded_get_path(
189    input: &VariantArray,
190    path: &[VariantPathElement<'_>],
191    as_field: Option<&Field>,
192    cast_options: &CastOptions,
193) -> Result<ArrayRef> {
194    // Helper that creates a new VariantArray from the given nested value and typed_value columns,
195    // properly accounting for accumulated nulls from path traversal
196    let make_target_variant =
197        |value: Option<ArrayRef>,
198         typed_value: Option<ArrayRef>,
199         accumulated_nulls: Option<NullBuffer>| {
200            let metadata = input.metadata_column().clone();
201            VariantArray::from_parts(metadata, value, typed_value, accumulated_nulls)
202        };
203
204    // Helper that extracts the value at `path` and casts it to the requested type, or returns it as
205    // an unshredded binary variant when `Variant` output is requested.
206    let shred_basic_variant =
207        |target: VariantArray, path: VariantPath<'_>, as_field: Option<&Field>| {
208            // A `VariantType` extension on `as_field` requests `Variant` output: return an
209            // unshredded binary variant instead of casting to a concrete Arrow type.
210            let requested_variant =
211                as_field.is_some_and(Field::has_valid_extension_type::<VariantType>);
212
213            // A `typed_value` in that field requests shredded output -- a `VariantArray` with
214            // `typed_value` columns. We produce only unshredded variant output. Shredded output is
215            // tracked in https://github.com/apache/arrow-rs/issues/8153. Reject such a request
216            // instead of silently dropping the shredding it asked for.
217            if requested_variant && requested_field_is_shredded(as_field) {
218                return Err(ArrowError::NotYetImplemented(
219                    "variant_get with shredded `Variant` output is not yet supported".to_string(),
220                ));
221            }
222
223            // Collapse any shredding back to binary. Only the `NotShredded` step below passes a
224            // non-empty `path`, and there `target` is already a plain `value` column (no
225            // `typed_value`) -- so `unshred_variant` hits its clone fast-path, with nothing deeper
226            // to shred. The builder then walks any remaining path per-row, emitting variant output
227            // because `as_type` is `None`.
228            let target = if requested_variant {
229                unshred_variant(&target)?
230            } else {
231                target
232            };
233
234            // Path exhausted, variant requested: return the target directly.
235            if requested_variant && path.is_empty() {
236                return Ok(ArrayRef::from(target));
237            }
238
239            let as_type = if requested_variant {
240                None
241            } else {
242                as_field.map(|f| f.data_type())
243            };
244            let mut builder = make_variant_to_arrow_row_builder(
245                target.metadata_column(),
246                path,
247                as_type,
248                cast_options,
249                target.len(),
250            )?;
251            for i in 0..target.len() {
252                if target.is_null(i) {
253                    builder.append_null()?;
254                } else if !cast_options.safe {
255                    let value = target.try_value(i)?;
256                    builder.append_value(value)?;
257                } else {
258                    let _ = match target.try_value(i) {
259                        Ok(v) => builder.append_value(v)?,
260                        Err(_) => {
261                            builder.append_null()?;
262                            false // add this to make match arms have the same return type
263                        }
264                    };
265                }
266            }
267            builder.finish()
268        };
269
270    // Peel away the prefix of path elements that traverses the shredded parts of this variant
271    // column. Shredding will traverse the rest of the path on a per-row basis.
272    let mut shredding_state = input.shredding_state().clone();
273    let mut accumulated_nulls = input.inner().nulls().cloned();
274    let mut path_index = 0;
275    for path_element in path {
276        match follow_shredded_path_element(&shredding_state, path_element, cast_options)? {
277            ShreddedPathStep::Success(state) => {
278                // Union nulls from the typed_value we just accessed
279                if let Some(typed_value) = shredding_state.typed_value_column() {
280                    accumulated_nulls =
281                        NullBuffer::union(accumulated_nulls.as_ref(), typed_value.nulls());
282                }
283                shredding_state = state;
284                path_index += 1;
285                continue;
286            }
287            ShreddedPathStep::Missing => {
288                let num_rows = input.len();
289                if as_field.is_some_and(Field::has_valid_extension_type::<VariantType>) {
290                    let all_nulls = Some(arrow::buffer::NullBuffer::from(vec![false; num_rows]));
291                    // Propagating metadata is not necessary for an all-NULL array, but is cheaper than constructing
292                    // a new empty metadata array. (n * 3 bytes vs Arc bump)
293                    let metadata = input.metadata_column().clone();
294                    let arr = VariantArray::from_parts(metadata, None, None, all_nulls);
295                    return Ok(ArrayRef::from(arr));
296                }
297                let arr = match as_field.map(|f| f.data_type()) {
298                    Some(data_type) => array::new_null_array(data_type, num_rows),
299                    None => Arc::new(array::NullArray::new(num_rows)) as _,
300                };
301                return Ok(arr);
302            }
303            ShreddedPathStep::NotShredded => {
304                let target = make_target_variant(
305                    shredding_state.value_column().cloned(),
306                    None,
307                    accumulated_nulls,
308                );
309                return shred_basic_variant(target, path[path_index..].into(), as_field);
310            }
311        };
312    }
313
314    // Path exhausted! Create a new `VariantArray` for the location we landed on.
315    let target = make_target_variant(
316        shredding_state.value_column().cloned(),
317        shredding_state.typed_value_column().cloned(),
318        accumulated_nulls,
319    );
320
321    // If our caller did not request any specific type, we can just return whatever we landed on.
322    let Some(as_field) = as_field else {
323        return Ok(ArrayRef::from(target));
324    };
325
326    // Try to return the typed value directly when we have a perfect shredding match.
327    if let Some(shredded) = try_perfect_shredding(&target, as_field) {
328        return Ok(shredded);
329    }
330
331    // Structs are special.
332    //
333    // For fully unshredded targets (`typed_value` absent), delegate to the row builder so we
334    // preserve struct-level cast semantics:
335    // - safe mode: non-object rows become NULL structs
336    // - strict mode: non-object rows raise a cast error
337    //
338    // For shredded/partially-shredded targets (`typed_value` present), recurse into each field
339    // separately to take advantage of deeper shredding in child fields.
340    if !as_field.has_valid_extension_type::<VariantType>() {
341        if let DataType::Struct(fields) = as_field.data_type() {
342            if target.typed_value_column().is_none() {
343                return shred_basic_variant(target, VariantPath::default(), Some(as_field));
344            }
345
346            let children = fields
347                .iter()
348                .map(|field| {
349                    let path = &[VariantPathElement::from(field.name().as_str())];
350                    shredded_get_path(&target, path, Some(field), cast_options)
351                })
352                .collect::<Result<Vec<_>>>()?;
353
354            return Ok(Arc::new(StructArray::try_new(
355                fields.clone(),
356                children,
357                target.nulls().cloned(),
358            )?));
359        }
360    }
361
362    // Not a struct, so directly shred the variant as the requested type
363    shred_basic_variant(target, VariantPath::default(), Some(as_field))
364}
365
366/// Returns true if `as_field` requests *shredded* `Variant` output.
367///
368/// Its struct carries a `typed_value` field naming the type to shred to.
369/// A plain variant request has only `metadata` and `value`.
370fn requested_field_is_shredded(as_field: Option<&Field>) -> bool {
371    as_field.is_some_and(|f| match f.data_type() {
372        DataType::Struct(fields) => fields.iter().any(|field| field.name() == "typed_value"),
373        _ => false,
374    })
375}
376
377fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Option<ArrayRef> {
378    // Try to return the typed value directly when we have a perfect shredding match.
379    if matches!(as_field.data_type(), DataType::Struct(_)) {
380        return None;
381    }
382    let typed_value = variant_array.typed_value_column()?;
383
384    if typed_value.data_type() == as_field.data_type()
385        && variant_array
386            .value_column()
387            .is_none_or(|v| v.null_count() == v.len())
388    {
389        // Here we need to gate against the case where the `typed_value` is null but data is in the `value` column.
390        // 1. If the `value` column is null, or
391        // 2. If every row in the `value` column is null
392
393        // This is a perfect shredding, where the value is entirely shredded out,
394        // so we can just return the typed value after merging the accumulated nulls.
395        let parent_nulls = variant_array.nulls();
396
397        // If we have no nulls OR the shredded array is `Null`, which doesn't support external nulls.
398        let target_array = if parent_nulls.is_none() || typed_value.data_type().is_null() {
399            typed_value.clone()
400        } else {
401            let merged_nulls = NullBuffer::union(parent_nulls, typed_value.nulls());
402            let data = typed_value
403                .to_data()
404                .into_builder()
405                .nulls(merged_nulls)
406                .build()
407                .ok()?;
408            make_array(data)
409        };
410
411        return Some(target_array);
412    }
413
414    None
415}
416
417/// Returns an array with the specified path extracted from the variant values.
418///
419/// The return array type depends on the `as_type` field of the options parameter
420/// 1. `as_type: None`: a VariantArray is returned. The values in this new VariantArray will point
421///    to the specified path.
422/// 2. `as_type: Some(<specific field>)`: an array of the specified type is returned.
423///
424/// TODO: How would a caller request a struct or list type where the fields/elements can be any
425/// variant? Caller can pass None as the requested type to fetch a specific path, but it would
426/// quickly become annoying (and inefficient) to call `variant_get` for each leaf value in a struct or
427/// list and then try to assemble the results.
428pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
429    let variant_array = VariantArray::try_new(input)?;
430
431    let GetOptions {
432        as_type,
433        path,
434        cast_options,
435    } = options;
436
437    shredded_get_path(&variant_array, &path, as_type.as_deref(), &cast_options)
438}
439
440/// Controls the action of the variant_get kernel.
441#[derive(Debug, Clone, Default)]
442pub struct GetOptions<'a> {
443    /// What path to extract
444    pub path: VariantPath<'a>,
445    /// if `as_type` is None, the returned array will itself be a VariantArray.
446    ///
447    /// if `as_type` is `Some(type)` the field is returned as the specified type.
448    pub as_type: Option<FieldRef>,
449    /// Controls the casting behavior (e.g. error vs substituting null on cast error).
450    pub cast_options: CastOptions<'a>,
451}
452
453impl<'a> GetOptions<'a> {
454    /// Construct default options to get the specified path as a variant.
455    pub fn new() -> Self {
456        Default::default()
457    }
458
459    /// Construct options to get the specified path as a variant.
460    pub fn new_with_path(path: VariantPath<'a>) -> Self {
461        Self {
462            path,
463            as_type: None,
464            cast_options: Default::default(),
465        }
466    }
467
468    /// Specify the type to return.
469    pub fn with_as_type(mut self, as_type: Option<FieldRef>) -> Self {
470        self.as_type = as_type;
471        self
472    }
473
474    /// Specify the cast options to use when casting to the specified type.
475    pub fn with_cast_options(mut self, cast_options: CastOptions<'a>) -> Self {
476        self.cast_options = cast_options;
477        self
478    }
479}
480
481#[cfg(test)]
482mod test {
483    use std::str::FromStr;
484    use std::sync::Arc;
485
486    use super::{GetOptions, requested_field_is_shredded, variant_get};
487    use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
488    use crate::{
489        ShreddedSchemaBuilder, VariantArray, VariantArrayBuilder, cast_to_variant, json_to_variant,
490        shred_variant,
491    };
492    use arrow::array::{
493        Array, ArrayRef, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
494        Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
495        FixedSizeListArray, Float32Array, Float64Array, Int8Array, Int16Array, Int32Array,
496        Int64Array, LargeBinaryArray, LargeListArray, LargeListViewArray, LargeStringArray,
497        ListArray, ListViewArray, NullArray, NullBuilder, StringArray, StringViewArray,
498        StructArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
499        Time64NanosecondArray,
500    };
501    use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
502    use arrow::compute::{CastOptions, cast};
503    use arrow::datatypes::DataType::{Int16, Int32, Int64};
504    use arrow::datatypes::i256;
505    use arrow::util::display::FormatOptions;
506    use arrow_schema::ArrowError;
507    use arrow_schema::DataType::{Boolean, Float32, Float64, Int8};
508    use arrow_schema::{DataType, Field, FieldRef, Fields, IntervalUnit, TimeUnit};
509    use chrono::DateTime;
510    use parquet_variant::{
511        EMPTY_VARIANT_METADATA_BYTES, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16,
512        VariantDecimalType, VariantPath,
513    };
514
515    fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) {
516        // Create input array from JSON string
517        let input_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(input_json)]));
518        let input_variant_array_ref = ArrayRef::from(json_to_variant(&input_array_ref).unwrap());
519
520        let result =
521            variant_get(&input_variant_array_ref, GetOptions::new_with_path(path)).unwrap();
522
523        // Create expected array from JSON string
524        let expected_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(expected_json)]));
525        let expected_variant_array = json_to_variant(&expected_array_ref).unwrap();
526
527        let result_array = VariantArray::try_new(&result).unwrap();
528        assert_eq!(
529            result_array.len(),
530            1,
531            "Expected result array to have length 1"
532        );
533        assert!(
534            result_array.nulls().is_none(),
535            "Expected no nulls in result array"
536        );
537        let result_variant = result_array.value(0);
538        let expected_variant = expected_variant_array.value(0);
539        assert_eq!(
540            result_variant, expected_variant,
541            "Result variant does not match expected variant"
542        );
543    }
544
545    #[test]
546    fn get_primitive_variant_field() {
547        single_variant_get_test(
548            r#"{"some_field": 1234}"#,
549            VariantPath::try_from("some_field").unwrap(),
550            "1234",
551        );
552    }
553
554    #[test]
555    fn get_primitive_variant_list_index() {
556        single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
557    }
558
559    #[test]
560    fn get_primitive_variant_inside_object_of_object() {
561        single_variant_get_test(
562            r#"{"top_level_field": {"inner_field": 1234}}"#,
563            VariantPath::try_from("top_level_field")
564                .unwrap()
565                .join("inner_field"),
566            "1234",
567        );
568    }
569
570    #[test]
571    fn get_primitive_variant_inside_list_of_object() {
572        single_variant_get_test(
573            r#"[{"some_field": 1234}]"#,
574            VariantPath::from(0).join("some_field"),
575            "1234",
576        );
577    }
578
579    #[test]
580    fn get_primitive_variant_inside_object_of_list() {
581        single_variant_get_test(
582            r#"{"some_field": [1234]}"#,
583            VariantPath::try_from("some_field[0]").unwrap(),
584            "1234",
585        );
586    }
587
588    #[test]
589    fn get_complex_variant() {
590        single_variant_get_test(
591            r#"{"top_level_field": {"inner_field": 1234}}"#,
592            VariantPath::try_from("top_level_field").unwrap(),
593            r#"{"inner_field": 1234}"#,
594        );
595    }
596
597    /// Partial Shredding: extract a value as a VariantArray
598    macro_rules! numeric_partially_shredded_test {
599        ($primitive_type:ty, $data_fn:ident) => {
600            let array = $data_fn();
601            let options = GetOptions::new();
602            let result = variant_get(&array, options).unwrap();
603
604            // expect the result is a VariantArray
605            let result = VariantArray::try_new(&result).unwrap();
606            assert_eq!(result.len(), 4);
607
608            // Expect the values are the same as the original values
609            assert_eq!(
610                result.value(0),
611                Variant::from(<$primitive_type>::try_from(34u8).unwrap())
612            );
613            assert!(!result.is_valid(1));
614            assert_eq!(result.value(2), Variant::from("n/a"));
615            assert_eq!(
616                result.value(3),
617                Variant::from(<$primitive_type>::try_from(100u8).unwrap())
618            );
619        };
620    }
621
622    /// Build a mixed input [typed, null, fallback, typed] and let shred_variant
623    /// generate the shredded fixture for the requested type.
624    macro_rules! partially_shredded_variant_array_gen {
625        ($func_name:ident,  $typed_value_array_gen: expr) => {
626            partially_shredded_variant_array_gen!(
627                $func_name,
628                $typed_value_array_gen,
629                Variant::from("n/a")
630            );
631        };
632        ($func_name:ident,  $typed_value_array_gen: expr, $fallback_variant:expr) => {
633            fn $func_name() -> ArrayRef {
634                let typed_value: ArrayRef = Arc::new($typed_value_array_gen());
635                let typed_as_variant = cast_to_variant(typed_value.as_ref())
636                    .expect("should cast typed array to variant");
637                let mut input_builder = VariantArrayBuilder::new(typed_as_variant.len());
638                input_builder.append_variant(typed_as_variant.value(0));
639                input_builder.append_null();
640                input_builder.append_variant($fallback_variant);
641                input_builder.append_variant(typed_as_variant.value(3));
642
643                let variant_array = shred_variant(&input_builder.build(), typed_value.data_type())
644                    .expect("should shred variant array");
645                ArrayRef::from(variant_array)
646            }
647        };
648    }
649
650    // Fixture definitions grouped with the partially-shredded tests.
651    macro_rules! numeric_partially_shredded_variant_array_fn {
652        ($func:ident, $array_type:ident, $primitive_type:ty) => {
653            partially_shredded_variant_array_gen!($func, || $array_type::from(vec![
654                Some(<$primitive_type>::try_from(34u8).unwrap()),
655                None,
656                None,
657                Some(<$primitive_type>::try_from(100u8).unwrap()),
658            ]));
659        };
660    }
661
662    numeric_partially_shredded_variant_array_fn!(
663        partially_shredded_int8_variant_array,
664        Int8Array,
665        i8
666    );
667    numeric_partially_shredded_variant_array_fn!(
668        partially_shredded_int16_variant_array,
669        Int16Array,
670        i16
671    );
672    numeric_partially_shredded_variant_array_fn!(
673        partially_shredded_int32_variant_array,
674        Int32Array,
675        i32
676    );
677    numeric_partially_shredded_variant_array_fn!(
678        partially_shredded_int64_variant_array,
679        Int64Array,
680        i64
681    );
682    numeric_partially_shredded_variant_array_fn!(
683        partially_shredded_float32_variant_array,
684        Float32Array,
685        f32
686    );
687    numeric_partially_shredded_variant_array_fn!(
688        partially_shredded_float64_variant_array,
689        Float64Array,
690        f64
691    );
692
693    partially_shredded_variant_array_gen!(partially_shredded_bool_variant_array, || {
694        arrow::array::BooleanArray::from(vec![Some(true), None, None, Some(false)])
695    });
696
697    partially_shredded_variant_array_gen!(
698        partially_shredded_utf8_variant_array,
699        || { StringArray::from(vec![Some("hello"), None, None, Some("world")]) },
700        Variant::from(42i32)
701    );
702
703    partially_shredded_variant_array_gen!(partially_shredded_date32_variant_array, || {
704        Date32Array::from(vec![
705            Some(20348), // 2025-09-17
706            None,
707            None,
708            Some(20340), // 2025-09-09
709        ])
710    });
711
712    #[test]
713    fn get_variant_partially_shredded_int8_as_variant() {
714        numeric_partially_shredded_test!(i8, partially_shredded_int8_variant_array);
715    }
716
717    #[test]
718    fn get_variant_partially_shredded_int16_as_variant() {
719        numeric_partially_shredded_test!(i16, partially_shredded_int16_variant_array);
720    }
721
722    #[test]
723    fn get_variant_partially_shredded_int32_as_variant() {
724        numeric_partially_shredded_test!(i32, partially_shredded_int32_variant_array);
725    }
726
727    #[test]
728    fn get_variant_partially_shredded_int64_as_variant() {
729        numeric_partially_shredded_test!(i64, partially_shredded_int64_variant_array);
730    }
731
732    #[test]
733    fn get_variant_partially_shredded_float32_as_variant() {
734        numeric_partially_shredded_test!(f32, partially_shredded_float32_variant_array);
735    }
736
737    #[test]
738    fn get_variant_partially_shredded_float64_as_variant() {
739        numeric_partially_shredded_test!(f64, partially_shredded_float64_variant_array);
740    }
741
742    #[test]
743    fn get_variant_partially_shredded_bool_as_variant() {
744        let array = partially_shredded_bool_variant_array();
745        let options = GetOptions::new();
746        let result = variant_get(&array, options).unwrap();
747
748        // expect the result is a VariantArray
749        let result = VariantArray::try_new(&result).unwrap();
750        assert_eq!(result.len(), 4);
751
752        // Expect the values are the same as the original values
753        assert_eq!(result.value(0), Variant::from(true));
754        assert!(!result.is_valid(1));
755        assert_eq!(result.value(2), Variant::from("n/a"));
756        assert_eq!(result.value(3), Variant::from(false));
757    }
758
759    #[test]
760    fn get_variant_partially_shredded_utf8_as_variant() {
761        let array = partially_shredded_utf8_variant_array();
762        let options = GetOptions::new();
763        let result = variant_get(&array, options).unwrap();
764
765        // expect the result is a VariantArray
766        let result = VariantArray::try_new(&result).unwrap();
767        assert_eq!(result.len(), 4);
768
769        // Expect the values are the same as the original values
770        assert_eq!(result.value(0), Variant::from("hello"));
771        assert!(!result.is_valid(1));
772        assert_eq!(result.value(2), Variant::from(42i32));
773        assert_eq!(result.value(3), Variant::from("world"));
774    }
775
776    partially_shredded_variant_array_gen!(partially_shredded_binary_view_variant_array, || {
777        BinaryViewArray::from(vec![
778            Some(&[1u8, 2u8, 3u8][..]), // row 0 is shredded
779            None,                       // row 1 is null
780            None,                       // row 2 is a string
781            Some(&[4u8, 5u8, 6u8][..]), // row 3 is shredded
782        ])
783    });
784
785    #[test]
786    fn get_variant_partially_shredded_date32_as_variant() {
787        let array = partially_shredded_date32_variant_array();
788        let options = GetOptions::new();
789        let result = variant_get(&array, options).unwrap();
790
791        // expect the result is a VariantArray
792        let result = VariantArray::try_new(&result).unwrap();
793        assert_eq!(result.len(), 4);
794
795        // Expect the values are the same as the original values
796        use chrono::NaiveDate;
797        let date1 = NaiveDate::from_ymd_opt(2025, 9, 17).unwrap();
798        let date2 = NaiveDate::from_ymd_opt(2025, 9, 9).unwrap();
799        assert_eq!(result.value(0), Variant::from(date1));
800        assert!(!result.is_valid(1));
801        assert_eq!(result.value(2), Variant::from("n/a"));
802        assert_eq!(result.value(3), Variant::from(date2));
803    }
804
805    #[test]
806    fn get_variant_partially_shredded_binary_view_as_variant() {
807        let array = partially_shredded_binary_view_variant_array();
808        let options = GetOptions::new();
809        let result = variant_get(&array, options).unwrap();
810
811        // expect the result is a VariantArray
812        let result = VariantArray::try_new(&result).unwrap();
813        assert_eq!(result.len(), 4);
814
815        // Expect the values are the same as the original values
816        assert_eq!(result.value(0), Variant::from(&[1u8, 2u8, 3u8][..]));
817        assert!(!result.is_valid(1));
818        assert_eq!(result.value(2), Variant::from("n/a"));
819        assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..]));
820    }
821
822    // Timestamp partially-shredded tests grouped with the other partially-shredded cases.
823    macro_rules! assert_variant_get_as_variant_array_with_default_option {
824        ($variant_array: expr, $array_expected: expr) => {{
825            let options = GetOptions::new();
826            let array = $variant_array;
827            let result = variant_get(&array, options).unwrap();
828            let result = VariantArray::try_new(&result).unwrap();
829
830            assert_eq!(result.len(), $array_expected.len());
831
832            for (idx, item) in $array_expected.into_iter().enumerate() {
833                match item {
834                    Some(item) => assert_eq!(result.value(idx), item),
835                    None => assert!(result.is_null(idx)),
836                }
837            }
838        }};
839    }
840
841    partially_shredded_variant_array_gen!(
842        partially_shredded_timestamp_micro_ntz_variant_array,
843        || {
844            arrow::array::TimestampMicrosecondArray::from(vec![
845                Some(-456000),
846                None,
847                None,
848                Some(1758602096000000),
849            ])
850        }
851    );
852
853    #[test]
854    fn get_variant_partial_shredded_timestamp_micro_ntz_as_variant() {
855        let array = partially_shredded_timestamp_micro_ntz_variant_array();
856        assert_variant_get_as_variant_array_with_default_option!(
857            array,
858            vec![
859                Some(Variant::from(
860                    DateTime::from_timestamp_micros(-456000i64)
861                        .unwrap()
862                        .naive_utc(),
863                )),
864                None,
865                Some(Variant::from("n/a")),
866                Some(Variant::from(
867                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
868                        .unwrap()
869                        .naive_utc(),
870                )),
871            ]
872        )
873    }
874
875    partially_shredded_variant_array_gen!(partially_shredded_timestamp_micro_variant_array, || {
876        arrow::array::TimestampMicrosecondArray::from(vec![
877            Some(-456000),
878            None,
879            None,
880            Some(1758602096000000),
881        ])
882        .with_timezone("+00:00")
883    });
884
885    #[test]
886    fn get_variant_partial_shredded_timestamp_micro_as_variant() {
887        let array = partially_shredded_timestamp_micro_variant_array();
888        assert_variant_get_as_variant_array_with_default_option!(
889            array,
890            vec![
891                Some(Variant::from(
892                    DateTime::from_timestamp_micros(-456000i64)
893                        .unwrap()
894                        .to_utc(),
895                )),
896                None,
897                Some(Variant::from("n/a")),
898                Some(Variant::from(
899                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
900                        .unwrap()
901                        .to_utc(),
902                )),
903            ]
904        )
905    }
906
907    partially_shredded_variant_array_gen!(
908        partially_shredded_timestamp_nano_ntz_variant_array,
909        || {
910            arrow::array::TimestampNanosecondArray::from(vec![
911                Some(-4999999561),
912                None,
913                None,
914                Some(1758602096000000000),
915            ])
916        }
917    );
918
919    #[test]
920    fn get_variant_partial_shredded_timestamp_nano_ntz_as_variant() {
921        let array = partially_shredded_timestamp_nano_ntz_variant_array();
922        assert_variant_get_as_variant_array_with_default_option!(
923            array,
924            vec![
925                Some(Variant::from(
926                    DateTime::from_timestamp(-5, 439).unwrap().naive_utc()
927                )),
928                None,
929                Some(Variant::from("n/a")),
930                Some(Variant::from(
931                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
932                        .unwrap()
933                        .naive_utc()
934                )),
935            ]
936        )
937    }
938
939    partially_shredded_variant_array_gen!(partially_shredded_timestamp_nano_variant_array, || {
940        arrow::array::TimestampNanosecondArray::from(vec![
941            Some(-4999999561),
942            None,
943            None,
944            Some(1758602096000000000),
945        ])
946        .with_timezone("+00:00")
947    });
948
949    #[test]
950    fn get_variant_partial_shredded_timestamp_nano_as_variant() {
951        let array = partially_shredded_timestamp_nano_variant_array();
952        assert_variant_get_as_variant_array_with_default_option!(
953            array,
954            vec![
955                Some(Variant::from(
956                    DateTime::from_timestamp(-5, 439).unwrap().to_utc()
957                )),
958                None,
959                Some(Variant::from("n/a")),
960                Some(Variant::from(
961                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
962                        .unwrap()
963                        .to_utc()
964                )),
965            ]
966        )
967    }
968
969    /// Shredding: extract a value as an Int32Array
970    #[test]
971    fn get_variant_shredded_int32_as_int32_safe_cast() {
972        // Extract the typed value as Int32Array
973        let array = partially_shredded_int32_variant_array();
974        // specify we want the typed value as Int32
975        let field = Field::new("typed_value", DataType::Int32, true);
976        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
977        let result = variant_get(&array, options).unwrap();
978        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
979            Some(34),
980            None,
981            None, // "n/a" is not an Int32 so converted to null
982            Some(100),
983        ]));
984        assert_eq!(&result, &expected)
985    }
986
987    /// Shredding: extract a value as an Int32Array, unsafe cast (should error on "n/a")
988    #[test]
989    fn get_variant_shredded_int32_as_int32_unsafe_cast() {
990        // Extract the typed value as Int32Array
991        let array = partially_shredded_int32_variant_array();
992        let field = Field::new("typed_value", DataType::Int32, true);
993        let cast_options = CastOptions {
994            safe: false, // unsafe cast
995            ..Default::default()
996        };
997        let options = GetOptions::new()
998            .with_as_type(Some(FieldRef::from(field)))
999            .with_cast_options(cast_options);
1000
1001        let err = variant_get(&array, options).unwrap_err();
1002        // TODO make this error message nicer (not Debug format)
1003        assert_eq!(
1004            err.to_string(),
1005            "Cast error: Failed to extract primitive of type Int32 from variant ShortString(ShortString(\"n/a\")) at path VariantPath([])"
1006        );
1007    }
1008
1009    /// Perfect Shredding: extract the typed value as a VariantArray
1010    macro_rules! numeric_perfectly_shredded_test {
1011        ($primitive_type:ty, $data_fn:ident) => {
1012            let array = $data_fn();
1013            let options = GetOptions::new();
1014            let result = variant_get(&array, options).unwrap();
1015
1016            // expect the result is a VariantArray
1017            let result = VariantArray::try_new(&result).unwrap();
1018            assert_eq!(result.len(), 3);
1019
1020            // Expect the values are the same as the original values
1021            assert_eq!(
1022                result.value(0),
1023                Variant::from(<$primitive_type>::try_from(1u8).unwrap())
1024            );
1025            assert_eq!(
1026                result.value(1),
1027                Variant::from(<$primitive_type>::try_from(2u8).unwrap())
1028            );
1029            assert_eq!(
1030                result.value(2),
1031                Variant::from(<$primitive_type>::try_from(3u8).unwrap())
1032            );
1033        };
1034    }
1035
1036    #[test]
1037    fn get_variant_perfectly_shredded_int8_as_variant() {
1038        numeric_perfectly_shredded_test!(i8, perfectly_shredded_int8_variant_array);
1039    }
1040
1041    #[test]
1042    fn get_variant_perfectly_shredded_int16_as_variant() {
1043        numeric_perfectly_shredded_test!(i16, perfectly_shredded_int16_variant_array);
1044    }
1045
1046    #[test]
1047    fn get_variant_perfectly_shredded_int32_as_variant() {
1048        numeric_perfectly_shredded_test!(i32, perfectly_shredded_int32_variant_array);
1049    }
1050
1051    #[test]
1052    fn get_variant_perfectly_shredded_int64_as_variant() {
1053        numeric_perfectly_shredded_test!(i64, perfectly_shredded_int64_variant_array);
1054    }
1055
1056    #[test]
1057    fn get_variant_perfectly_shredded_float32_as_variant() {
1058        numeric_perfectly_shredded_test!(f32, perfectly_shredded_float32_variant_array);
1059    }
1060
1061    #[test]
1062    fn get_variant_perfectly_shredded_float64_as_variant() {
1063        numeric_perfectly_shredded_test!(f64, perfectly_shredded_float64_variant_array);
1064    }
1065
1066    /// AllNull: extract a value as a VariantArray
1067    #[test]
1068    fn get_variant_all_null_as_variant() {
1069        let array = all_null_variant_array();
1070        let options = GetOptions::new();
1071        let result = variant_get(&array, options).unwrap();
1072
1073        // expect the result is a VariantArray
1074        let result = VariantArray::try_new(&result).unwrap();
1075        assert_eq!(result.len(), 3);
1076
1077        // All values should be null
1078        assert!(!result.is_valid(0));
1079        assert!(!result.is_valid(1));
1080        assert!(!result.is_valid(2));
1081    }
1082
1083    /// AllNull: extract a value as an Int32Array
1084    #[test]
1085    fn get_variant_all_null_as_int32() {
1086        let array = all_null_variant_array();
1087        // specify we want the typed value as Int32
1088        let field = Field::new("typed_value", DataType::Int32, true);
1089        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
1090        let result = variant_get(&array, options).unwrap();
1091
1092        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1093            Option::<i32>::None,
1094            Option::<i32>::None,
1095            Option::<i32>::None,
1096        ]));
1097        assert_eq!(&result, &expected)
1098    }
1099
1100    macro_rules! perfectly_shredded_to_arrow_primitive_test {
1101        ($name:ident, $primitive_type:expr, $perfectly_shredded_array_gen_fun:ident, $expected_array:expr) => {
1102            #[test]
1103            fn $name() {
1104                let array = $perfectly_shredded_array_gen_fun();
1105                let field = Field::new("typed_value", $primitive_type, true);
1106                let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
1107                let result = variant_get(&array, options).unwrap();
1108                let expected_array: ArrayRef = Arc::new($expected_array);
1109                assert_eq!(&result, &expected_array);
1110            }
1111        };
1112    }
1113
1114    perfectly_shredded_to_arrow_primitive_test!(
1115        get_variant_perfectly_shredded_int18_as_int8,
1116        Int8,
1117        perfectly_shredded_int8_variant_array,
1118        Int8Array::from(vec![Some(1), Some(2), Some(3)])
1119    );
1120
1121    perfectly_shredded_to_arrow_primitive_test!(
1122        get_variant_perfectly_shredded_int16_as_int16,
1123        Int16,
1124        perfectly_shredded_int16_variant_array,
1125        Int16Array::from(vec![Some(1), Some(2), Some(3)])
1126    );
1127
1128    perfectly_shredded_to_arrow_primitive_test!(
1129        get_variant_perfectly_shredded_int32_as_int32,
1130        Int32,
1131        perfectly_shredded_int32_variant_array,
1132        Int32Array::from(vec![Some(1), Some(2), Some(3)])
1133    );
1134
1135    perfectly_shredded_to_arrow_primitive_test!(
1136        get_variant_perfectly_shredded_int64_as_int64,
1137        Int64,
1138        perfectly_shredded_int64_variant_array,
1139        Int64Array::from(vec![Some(1), Some(2), Some(3)])
1140    );
1141
1142    perfectly_shredded_to_arrow_primitive_test!(
1143        get_variant_perfectly_shredded_float32_as_float32,
1144        Float32,
1145        perfectly_shredded_float32_variant_array,
1146        Float32Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
1147    );
1148
1149    perfectly_shredded_to_arrow_primitive_test!(
1150        get_variant_perfectly_shredded_float64_as_float64,
1151        Float64,
1152        perfectly_shredded_float64_variant_array,
1153        Float64Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
1154    );
1155
1156    perfectly_shredded_to_arrow_primitive_test!(
1157        get_variant_perfectly_shredded_boolean_as_boolean,
1158        Boolean,
1159        perfectly_shredded_bool_variant_array,
1160        BooleanArray::from(vec![Some(true), Some(false), Some(true)])
1161    );
1162
1163    perfectly_shredded_to_arrow_primitive_test!(
1164        get_variant_perfectly_shredded_utf8_as_utf8,
1165        DataType::Utf8,
1166        perfectly_shredded_utf8_variant_array,
1167        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
1168    );
1169
1170    perfectly_shredded_to_arrow_primitive_test!(
1171        get_variant_perfectly_shredded_large_utf8_as_utf8,
1172        DataType::Utf8,
1173        perfectly_shredded_large_utf8_variant_array,
1174        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
1175    );
1176
1177    perfectly_shredded_to_arrow_primitive_test!(
1178        get_variant_perfectly_shredded_utf8_view_as_utf8,
1179        DataType::Utf8,
1180        perfectly_shredded_utf8_view_variant_array,
1181        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
1182    );
1183
1184    macro_rules! perfectly_shredded_variant_array_fn {
1185        ($func:ident, $typed_value_gen:expr) => {
1186            fn $func() -> ArrayRef {
1187                // Prefer producing fixtures with shred_variant from unshredded input.
1188                // Fall back for remaining non-shreddable test-only Arrow types (currently Null).
1189                let typed_value: ArrayRef = Arc::new($typed_value_gen());
1190                if let Some(shredded) = cast_to_variant(typed_value.as_ref())
1191                    .ok()
1192                    .and_then(|unshredded| shred_variant(&unshredded, typed_value.data_type()).ok())
1193                {
1194                    return shredded.into();
1195                }
1196
1197                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
1198                    EMPTY_VARIANT_METADATA_BYTES,
1199                    typed_value.len(),
1200                ));
1201                VariantArray::from_parts(Arc::new(metadata), None, Some(typed_value), None).into()
1202            }
1203        };
1204    }
1205
1206    perfectly_shredded_variant_array_fn!(perfectly_shredded_utf8_variant_array, || {
1207        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
1208    });
1209
1210    perfectly_shredded_variant_array_fn!(perfectly_shredded_large_utf8_variant_array, || {
1211        LargeStringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
1212    });
1213
1214    perfectly_shredded_variant_array_fn!(perfectly_shredded_utf8_view_variant_array, || {
1215        StringViewArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
1216    });
1217
1218    perfectly_shredded_variant_array_fn!(perfectly_shredded_bool_variant_array, || {
1219        BooleanArray::from(vec![Some(true), Some(false), Some(true)])
1220    });
1221
1222    /// Return a VariantArray that represents a perfectly "shredded" variant
1223    /// for the given typed value.
1224    ///
1225    /// The schema of the corresponding `StructArray` would look like this:
1226    ///
1227    /// ```text
1228    /// StructArray {
1229    ///   metadata: BinaryViewArray,
1230    ///   typed_value: Int32Array,
1231    /// }
1232    /// ```
1233    macro_rules! numeric_perfectly_shredded_variant_array_fn {
1234        ($func:ident, $array_type:ident, $primitive_type:ty) => {
1235            perfectly_shredded_variant_array_fn!($func, || {
1236                $array_type::from(vec![
1237                    Some(<$primitive_type>::try_from(1u8).unwrap()),
1238                    Some(<$primitive_type>::try_from(2u8).unwrap()),
1239                    Some(<$primitive_type>::try_from(3u8).unwrap()),
1240                ])
1241            });
1242        };
1243    }
1244
1245    numeric_perfectly_shredded_variant_array_fn!(
1246        perfectly_shredded_int8_variant_array,
1247        Int8Array,
1248        i8
1249    );
1250    numeric_perfectly_shredded_variant_array_fn!(
1251        perfectly_shredded_int16_variant_array,
1252        Int16Array,
1253        i16
1254    );
1255    numeric_perfectly_shredded_variant_array_fn!(
1256        perfectly_shredded_int32_variant_array,
1257        Int32Array,
1258        i32
1259    );
1260    numeric_perfectly_shredded_variant_array_fn!(
1261        perfectly_shredded_int64_variant_array,
1262        Int64Array,
1263        i64
1264    );
1265    numeric_perfectly_shredded_variant_array_fn!(
1266        perfectly_shredded_float32_variant_array,
1267        Float32Array,
1268        f32
1269    );
1270    numeric_perfectly_shredded_variant_array_fn!(
1271        perfectly_shredded_float64_variant_array,
1272        Float64Array,
1273        f64
1274    );
1275
1276    perfectly_shredded_variant_array_fn!(
1277        perfectly_shredded_timestamp_micro_ntz_variant_array,
1278        || {
1279            arrow::array::TimestampMicrosecondArray::from(vec![
1280                Some(-456000),
1281                Some(1758602096000001),
1282                Some(1758602096000002),
1283            ])
1284        }
1285    );
1286
1287    perfectly_shredded_to_arrow_primitive_test!(
1288        get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_micro_ntz,
1289        DataType::Timestamp(TimeUnit::Microsecond, None),
1290        perfectly_shredded_timestamp_micro_ntz_variant_array,
1291        arrow::array::TimestampMicrosecondArray::from(vec![
1292            Some(-456000),
1293            Some(1758602096000001),
1294            Some(1758602096000002),
1295        ])
1296    );
1297
1298    // test converting micro to nano
1299    perfectly_shredded_to_arrow_primitive_test!(
1300        get_variant_perfectly_shredded_timestamp_micro_ntz_as_nano_ntz,
1301        DataType::Timestamp(TimeUnit::Nanosecond, None),
1302        perfectly_shredded_timestamp_micro_ntz_variant_array,
1303        arrow::array::TimestampNanosecondArray::from(vec![
1304            Some(-456000000),
1305            Some(1758602096000001000),
1306            Some(1758602096000002000)
1307        ])
1308    );
1309
1310    perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_micro_variant_array, || {
1311        arrow::array::TimestampMicrosecondArray::from(vec![
1312            Some(-456000),
1313            Some(1758602096000001),
1314            Some(1758602096000002),
1315        ])
1316        .with_timezone("+00:00")
1317    });
1318
1319    perfectly_shredded_to_arrow_primitive_test!(
1320        get_variant_perfectly_shredded_timestamp_micro_as_timestamp_micro,
1321        DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("+00:00"))),
1322        perfectly_shredded_timestamp_micro_variant_array,
1323        arrow::array::TimestampMicrosecondArray::from(vec![
1324            Some(-456000),
1325            Some(1758602096000001),
1326            Some(1758602096000002),
1327        ])
1328        .with_timezone("+00:00")
1329    );
1330
1331    // test converting micro to nano
1332    perfectly_shredded_to_arrow_primitive_test!(
1333        get_variant_perfectly_shredded_timestamp_micro_as_nano,
1334        DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
1335        perfectly_shredded_timestamp_micro_variant_array,
1336        arrow::array::TimestampNanosecondArray::from(vec![
1337            Some(-456000000),
1338            Some(1758602096000001000),
1339            Some(1758602096000002000)
1340        ])
1341        .with_timezone("+00:00")
1342    );
1343
1344    perfectly_shredded_variant_array_fn!(
1345        perfectly_shredded_timestamp_nano_ntz_variant_array,
1346        || {
1347            arrow::array::TimestampNanosecondArray::from(vec![
1348                Some(-4999999561),
1349                Some(1758602096000000001),
1350                Some(1758602096000000002),
1351            ])
1352        }
1353    );
1354
1355    perfectly_shredded_variant_array_fn!(
1356        perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
1357        || {
1358            arrow::array::TimestampMicrosecondArray::from(vec![
1359                Some(1234),       // can't be cast to second & millisecond
1360                Some(1234000),    // can be cast to millisecond, but not second
1361                Some(1234000000), // can be cast to second & millisecond
1362            ])
1363            .with_timezone("+00:00")
1364        }
1365    );
1366
1367    // The following two tests wants to cover the micro with timezone -> milli/second cases
1368    // there are three test items, which contains some items can be cast safely, and some can't
1369    perfectly_shredded_to_arrow_primitive_test!(
1370        get_variant_perfectly_shredded_timestamp_micro_as_timestamp_second,
1371        DataType::Timestamp(TimeUnit::Second, Some(Arc::from("+00:00"))),
1372        perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
1373        arrow::array::TimestampSecondArray::from(vec![
1374            None,
1375            None, // Return None if can't be cast to second safely
1376            Some(1234)
1377        ])
1378        .with_timezone("+00:00")
1379    );
1380
1381    perfectly_shredded_to_arrow_primitive_test!(
1382        get_variant_perfectly_shredded_timestamp_micro_as_timestamp_milli,
1383        DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("+00:00"))),
1384        perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
1385        arrow::array::TimestampMillisecondArray::from(vec![
1386            None, // Return None if can't be cast to millisecond safely
1387            Some(1234),
1388            Some(1234000)
1389        ])
1390        .with_timezone("+00:00")
1391    );
1392
1393    perfectly_shredded_variant_array_fn!(
1394        perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
1395        || {
1396            arrow::array::TimestampMicrosecondArray::from(vec![
1397                Some(1234),       // can't be cast to second & millisecond
1398                Some(1234000),    // can be cast to millisecond, but not second
1399                Some(1234000000), // can be cast to second & millisecond
1400            ])
1401        }
1402    );
1403
1404    // The following two tests wants to cover the micro_ntz -> milli/second cases
1405    // there are three test items, which contains some items can be cast safely, and some can't
1406    perfectly_shredded_to_arrow_primitive_test!(
1407        get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_second,
1408        DataType::Timestamp(TimeUnit::Second, None),
1409        perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
1410        arrow::array::TimestampSecondArray::from(vec![
1411            None,
1412            None, // Return None if can't be cast to second safely
1413            Some(1234)
1414        ])
1415    );
1416
1417    perfectly_shredded_to_arrow_primitive_test!(
1418        get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_milli,
1419        DataType::Timestamp(TimeUnit::Millisecond, None),
1420        perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
1421        arrow::array::TimestampMillisecondArray::from(vec![
1422            None, // Return None if can't be cast to millisecond safely
1423            Some(1234),
1424            Some(1234000)
1425        ])
1426    );
1427
1428    perfectly_shredded_variant_array_fn!(
1429        perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
1430        || {
1431            arrow::array::TimestampNanosecondArray::from(vec![
1432                Some(1234000),       // can't be cast to second & millisecond
1433                Some(1234000000),    // can be cast to millisecond, but not second
1434                Some(1234000000000), // can be cast to second & millisecond
1435            ])
1436            .with_timezone("+00:00")
1437        }
1438    );
1439
1440    // The following two tests wants to cover the nano with timezone -> milli/second cases
1441    // there are three test items, which contains some items can be cast safely, and some can't
1442    perfectly_shredded_to_arrow_primitive_test!(
1443        get_variant_perfectly_shredded_timestamp_nano_as_timestamp_second,
1444        DataType::Timestamp(TimeUnit::Second, Some(Arc::from("+00:00"))),
1445        perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
1446        arrow::array::TimestampSecondArray::from(vec![
1447            None,
1448            None, // Return None if can't be cast to second safely
1449            Some(1234)
1450        ])
1451        .with_timezone("+00:00")
1452    );
1453
1454    perfectly_shredded_to_arrow_primitive_test!(
1455        get_variant_perfectly_shredded_timestamp_nano_as_timestamp_milli,
1456        DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("+00:00"))),
1457        perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
1458        arrow::array::TimestampMillisecondArray::from(vec![
1459            None, // Return None if can't be cast to millisecond safely
1460            Some(1234),
1461            Some(1234000)
1462        ])
1463        .with_timezone("+00:00")
1464    );
1465
1466    perfectly_shredded_variant_array_fn!(
1467        perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
1468        || {
1469            arrow::array::TimestampNanosecondArray::from(vec![
1470                Some(1234000),       // can't be cast to second & millisecond
1471                Some(1234000000),    // can be cast to millisecond, but not second
1472                Some(1234000000000), // can be cast to second & millisecond
1473            ])
1474        }
1475    );
1476
1477    // The following two tests wants to cover the nano_ntz -> milli/second cases
1478    // there are three test items, which contains some items can be cast safely, and some can't
1479    perfectly_shredded_to_arrow_primitive_test!(
1480        get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_second,
1481        DataType::Timestamp(TimeUnit::Second, None),
1482        perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
1483        arrow::array::TimestampSecondArray::from(vec![
1484            None,
1485            None, // Return None if can't be cast to second safely
1486            Some(1234)
1487        ])
1488    );
1489
1490    perfectly_shredded_to_arrow_primitive_test!(
1491        get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_milli,
1492        DataType::Timestamp(TimeUnit::Millisecond, None),
1493        perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
1494        arrow::array::TimestampMillisecondArray::from(vec![
1495            None, // Return None if can't be cast to millisecond safely
1496            Some(1234),
1497            Some(1234000)
1498        ])
1499    );
1500
1501    perfectly_shredded_to_arrow_primitive_test!(
1502        get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_nano_ntz,
1503        DataType::Timestamp(TimeUnit::Nanosecond, None),
1504        perfectly_shredded_timestamp_nano_ntz_variant_array,
1505        arrow::array::TimestampNanosecondArray::from(vec![
1506            Some(-4999999561),
1507            Some(1758602096000000001),
1508            Some(1758602096000000002),
1509        ])
1510    );
1511
1512    perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_nano_variant_array, || {
1513        arrow::array::TimestampNanosecondArray::from(vec![
1514            Some(-4999999561),
1515            Some(1758602096000000001),
1516            Some(1758602096000000002),
1517        ])
1518        .with_timezone("+00:00")
1519    });
1520
1521    perfectly_shredded_to_arrow_primitive_test!(
1522        get_variant_perfectly_shredded_timestamp_nano_as_timestamp_nano,
1523        DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
1524        perfectly_shredded_timestamp_nano_variant_array,
1525        arrow::array::TimestampNanosecondArray::from(vec![
1526            Some(-4999999561),
1527            Some(1758602096000000001),
1528            Some(1758602096000000002),
1529        ])
1530        .with_timezone("+00:00")
1531    );
1532
1533    perfectly_shredded_variant_array_fn!(perfectly_shredded_date_variant_array, || {
1534        Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
1535    });
1536
1537    perfectly_shredded_to_arrow_primitive_test!(
1538        get_variant_perfectly_shredded_date_as_date,
1539        DataType::Date32,
1540        perfectly_shredded_date_variant_array,
1541        Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
1542    );
1543
1544    perfectly_shredded_to_arrow_primitive_test!(
1545        get_variant_perfectly_shredded_date_as_date64,
1546        DataType::Date64,
1547        perfectly_shredded_date_variant_array,
1548        Date64Array::from(vec![
1549            Some(-1066608000000),
1550            Some(1519430400000),
1551            Some(1728000000000)
1552        ])
1553    );
1554
1555    perfectly_shredded_variant_array_fn!(perfectly_shredded_time_variant_array, || {
1556        Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
1557    });
1558
1559    perfectly_shredded_to_arrow_primitive_test!(
1560        get_variant_perfectly_shredded_time_as_time,
1561        DataType::Time64(TimeUnit::Microsecond),
1562        perfectly_shredded_time_variant_array,
1563        Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
1564    );
1565
1566    perfectly_shredded_to_arrow_primitive_test!(
1567        get_variant_perfectly_shredded_time_as_time64_nano,
1568        DataType::Time64(TimeUnit::Nanosecond),
1569        perfectly_shredded_time_variant_array,
1570        Time64NanosecondArray::from(vec![
1571            Some(12345000000),
1572            Some(87654000000),
1573            Some(135792000000)
1574        ])
1575    );
1576
1577    perfectly_shredded_variant_array_fn!(perfectly_shredded_time_variant_array_for_time32, || {
1578        Time64MicrosecondArray::from(vec![
1579            Some(1234),        // This can't be cast to Time32 losslessly
1580            Some(7654000),     // This can be cast to Time32(Millisecond), but not Time32(Second)
1581            Some(35792000000), // This can be cast to Time32(Second) & Time32(Millisecond)
1582        ])
1583    });
1584
1585    perfectly_shredded_to_arrow_primitive_test!(
1586        get_variant_perfectly_shredded_time_as_time32_second,
1587        DataType::Time32(TimeUnit::Second),
1588        perfectly_shredded_time_variant_array_for_time32,
1589        Time32SecondArray::from(vec![
1590            None,
1591            None, // Return None if can't be cast to Time32(Second) safely
1592            Some(35792)
1593        ])
1594    );
1595
1596    perfectly_shredded_to_arrow_primitive_test!(
1597        get_variant_perfectly_shredded_time_as_time32_milli,
1598        DataType::Time32(TimeUnit::Millisecond),
1599        perfectly_shredded_time_variant_array_for_time32,
1600        Time32MillisecondArray::from(vec![
1601            None, // Return None if can't be cast to Time32(Second) safely
1602            Some(7654),
1603            Some(35792000)
1604        ])
1605    );
1606
1607    perfectly_shredded_variant_array_fn!(perfectly_shredded_null_variant_array, || {
1608        let mut builder = NullBuilder::new();
1609        builder.append_nulls(3);
1610        builder.finish()
1611    });
1612
1613    perfectly_shredded_to_arrow_primitive_test!(
1614        get_variant_perfectly_shredded_null_as_null,
1615        DataType::Null,
1616        perfectly_shredded_null_variant_array,
1617        arrow::array::NullArray::new(3)
1618    );
1619
1620    perfectly_shredded_variant_array_fn!(perfectly_shredded_null_variant_array_with_int, || {
1621        Int32Array::from(vec![Some(32), Some(64), Some(48)])
1622    });
1623
1624    // We append null values if type miss match happens in safe mode
1625    perfectly_shredded_to_arrow_primitive_test!(
1626        get_variant_perfectly_shredded_null_with_type_missmatch_in_safe_mode,
1627        DataType::Null,
1628        perfectly_shredded_null_variant_array_with_int,
1629        arrow::array::NullArray::new(3)
1630    );
1631
1632    // We'll return an error if type miss match happens in strict mode
1633    #[test]
1634    fn get_variant_perfectly_shredded_null_as_null_with_type_missmatch_in_strict_mode() {
1635        let array = perfectly_shredded_null_variant_array_with_int();
1636        let field = Field::new("typed_value", DataType::Null, true);
1637        let options = GetOptions::new()
1638            .with_as_type(Some(FieldRef::from(field)))
1639            .with_cast_options(CastOptions {
1640                safe: false,
1641                format_options: FormatOptions::default(),
1642            });
1643
1644        let result = variant_get(&array, options);
1645
1646        assert!(result.is_err());
1647        let error_msg = format!("{}", result.unwrap_err());
1648        assert!(
1649            error_msg
1650                .contains("Cast error: Failed to extract primitive of type Null from variant Int32(32) at path VariantPath([])"),
1651            "Expected=[Cast error: Failed to extract primitive of type Null from variant Int32(32) at path VariantPath([])],\
1652                Got error message=[{}]",
1653            error_msg
1654        );
1655    }
1656
1657    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal4_variant_array, || {
1658        Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
1659            .with_precision_and_scale(5, 2)
1660            .unwrap()
1661    });
1662
1663    perfectly_shredded_to_arrow_primitive_test!(
1664        get_variant_perfectly_shredded_decimal4_as_decimal4,
1665        DataType::Decimal32(5, 2),
1666        perfectly_shredded_decimal4_variant_array,
1667        Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
1668            .with_precision_and_scale(5, 2)
1669            .unwrap()
1670    );
1671
1672    perfectly_shredded_variant_array_fn!(
1673        perfectly_shredded_decimal8_variant_array_cast2decimal32,
1674        || {
1675            Decimal64Array::from(vec![Some(123456), Some(145678), Some(-123456)])
1676                .with_precision_and_scale(6, 1)
1677                .unwrap()
1678        }
1679    );
1680
1681    // The input will be cast to Decimal32 when transformed to Variant
1682    // This tests will covert the logic DataType::Decimal64(the original array)
1683    // -> Variant::Decimal4(VariantArray) -> DataType::Decimal64(the result array)
1684    perfectly_shredded_to_arrow_primitive_test!(
1685        get_variant_perfectly_shredded_decimal8_through_decimal32_as_decimal8,
1686        DataType::Decimal64(6, 1),
1687        perfectly_shredded_decimal8_variant_array_cast2decimal32,
1688        Decimal64Array::from(vec![Some(123456), Some(145678), Some(-123456)])
1689            .with_precision_and_scale(6, 1)
1690            .unwrap()
1691    );
1692
1693    // This tests will covert the logic DataType::Decimal64(the original array)
1694    //  -> Variant::Decimal8(VariantArray) -> DataType::Decimal64(the result array)
1695    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal8_variant_array, || {
1696        Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1697            .with_precision_and_scale(10, 1)
1698            .unwrap()
1699    });
1700
1701    perfectly_shredded_to_arrow_primitive_test!(
1702        get_variant_perfectly_shredded_decimal8_as_decimal8,
1703        DataType::Decimal64(10, 1),
1704        perfectly_shredded_decimal8_variant_array,
1705        Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1706            .with_precision_and_scale(10, 1)
1707            .unwrap()
1708    );
1709
1710    // This tests will covert the logic DataType::Decimal128(the original array)
1711    //  -> Variant::Decimal4(VariantArray) -> DataType::Decimal128(the result array)
1712    perfectly_shredded_variant_array_fn!(
1713        perfectly_shredded_decimal16_within_decimal4_variant_array,
1714        || {
1715            Decimal128Array::from(vec![
1716                Some(i128::from(1234589)),
1717                Some(i128::from(2344444)),
1718                Some(i128::from(-1234789)),
1719            ])
1720            .with_precision_and_scale(7, 3)
1721            .unwrap()
1722        }
1723    );
1724
1725    // This tests will covert the logic DataType::Decimal128(the original array)
1726    // -> Variant::Decimal4(VariantArray) -> DataType::Decimal128(the result array)
1727    perfectly_shredded_to_arrow_primitive_test!(
1728        get_variant_perfectly_shredded_decimal16_within_decimal4_as_decimal16,
1729        DataType::Decimal128(7, 3),
1730        perfectly_shredded_decimal16_within_decimal4_variant_array,
1731        Decimal128Array::from(vec![
1732            Some(i128::from(1234589)),
1733            Some(i128::from(2344444)),
1734            Some(i128::from(-1234789)),
1735        ])
1736        .with_precision_and_scale(7, 3)
1737        .unwrap()
1738    );
1739
1740    perfectly_shredded_variant_array_fn!(
1741        perfectly_shredded_decimal16_within_decimal8_variant_array,
1742        || {
1743            Decimal128Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1744                .with_precision_and_scale(10, 1)
1745                .unwrap()
1746        }
1747    );
1748
1749    // This tests will covert the logic DataType::Decimal128(the original array)
1750    // -> Variant::Decimal8(VariantArray) -> DataType::Decimal128(the result array)
1751    perfectly_shredded_to_arrow_primitive_test!(
1752        get_variant_perfectly_shredded_decimal16_within8_as_decimal16,
1753        DataType::Decimal128(10, 1),
1754        perfectly_shredded_decimal16_within_decimal8_variant_array,
1755        Decimal128Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1756            .with_precision_and_scale(10, 1)
1757            .unwrap()
1758    );
1759
1760    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal16_variant_array, || {
1761        Decimal128Array::from(vec![
1762            Some(i128::from_str("12345678901234567899").unwrap()),
1763            Some(i128::from_str("23445677483748324300").unwrap()),
1764            Some(i128::from_str("-12345678901234567899").unwrap()),
1765        ])
1766        .with_precision_and_scale(20, 3)
1767        .unwrap()
1768    });
1769
1770    // This tests will covert the logic DataType::Decimal128(the original array)
1771    // -> Variant::Decimal16(VariantArray) -> DataType::Decimal128(the result array)
1772    perfectly_shredded_to_arrow_primitive_test!(
1773        get_variant_perfectly_shredded_decimal16_as_decimal16,
1774        DataType::Decimal128(20, 3),
1775        perfectly_shredded_decimal16_variant_array,
1776        Decimal128Array::from(vec![
1777            Some(i128::from_str("12345678901234567899").unwrap()),
1778            Some(i128::from_str("23445677483748324300").unwrap()),
1779            Some(i128::from_str("-12345678901234567899").unwrap())
1780        ])
1781        .with_precision_and_scale(20, 3)
1782        .unwrap()
1783    );
1784
1785    perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_variant_array, || {
1786        BinaryArray::from(vec![
1787            Some(b"Apache" as &[u8]),
1788            Some(b"Arrow-rs" as &[u8]),
1789            Some(b"Parquet-variant" as &[u8]),
1790        ])
1791    });
1792
1793    perfectly_shredded_to_arrow_primitive_test!(
1794        get_variant_perfectly_shredded_binary_as_binary,
1795        DataType::Binary,
1796        perfectly_shredded_binary_variant_array,
1797        BinaryArray::from(vec![
1798            Some(b"Apache" as &[u8]),
1799            Some(b"Arrow-rs" as &[u8]),
1800            Some(b"Parquet-variant" as &[u8]),
1801        ])
1802    );
1803
1804    perfectly_shredded_variant_array_fn!(perfectly_shredded_large_binary_variant_array, || {
1805        LargeBinaryArray::from(vec![
1806            Some(b"Apache" as &[u8]),
1807            Some(b"Arrow-rs" as &[u8]),
1808            Some(b"Parquet-variant" as &[u8]),
1809        ])
1810    });
1811
1812    perfectly_shredded_to_arrow_primitive_test!(
1813        get_variant_perfectly_shredded_large_binary_as_large_binary,
1814        DataType::LargeBinary,
1815        perfectly_shredded_large_binary_variant_array,
1816        LargeBinaryArray::from(vec![
1817            Some(b"Apache" as &[u8]),
1818            Some(b"Arrow-rs" as &[u8]),
1819            Some(b"Parquet-variant" as &[u8]),
1820        ])
1821    );
1822
1823    perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_view_variant_array, || {
1824        BinaryViewArray::from(vec![
1825            Some(b"Apache" as &[u8]),
1826            Some(b"Arrow-rs" as &[u8]),
1827            Some(b"Parquet-variant" as &[u8]),
1828        ])
1829    });
1830
1831    perfectly_shredded_to_arrow_primitive_test!(
1832        get_variant_perfectly_shredded_binary_view_as_binary_view,
1833        DataType::BinaryView,
1834        perfectly_shredded_binary_view_variant_array,
1835        BinaryViewArray::from(vec![
1836            Some(b"Apache" as &[u8]),
1837            Some(b"Arrow-rs" as &[u8]),
1838            Some(b"Parquet-variant" as &[u8]),
1839        ])
1840    );
1841
1842    /// Return a VariantArray that represents an "all null" variant
1843    /// for the following example (3 null values):
1844    ///
1845    /// ```text
1846    /// null
1847    /// null
1848    /// null
1849    /// ```
1850    ///
1851    /// The schema of the corresponding `StructArray` would look like this:
1852    ///
1853    /// ```text
1854    /// StructArray {
1855    ///   metadata: BinaryViewArray,
1856    /// }
1857    /// ```
1858    fn all_null_variant_array() -> ArrayRef {
1859        let nulls = NullBuffer::from(vec![
1860            false, // row 0 is null
1861            false, // row 1 is null
1862            false, // row 2 is null
1863        ]);
1864
1865        // metadata is the same for all rows (though they're all null)
1866        let metadata =
1867            BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3));
1868
1869        ArrayRef::from(VariantArray::from_parts(
1870            Arc::new(metadata),
1871            None,
1872            None,
1873            Some(nulls),
1874        ))
1875    }
1876
1877    /// This test manually constructs a shredded variant array representing objects
1878    /// like {"x": 1, "y": "foo"} and {"x": 42} and tests extracting the "x" field
1879    /// as VariantArray using variant_get.
1880    #[test]
1881    fn test_shredded_object_field_access() {
1882        let array = shredded_object_with_x_field_variant_array();
1883
1884        // Test: Extract the "x" field as VariantArray first
1885        let options = GetOptions::new_with_path(VariantPath::try_from("x").unwrap());
1886        let result = variant_get(&array, options).unwrap();
1887
1888        let result_variant = VariantArray::try_new(&result).unwrap();
1889        assert_eq!(result_variant.len(), 2);
1890
1891        // Row 0: expect x=1
1892        assert_eq!(result_variant.value(0), Variant::Int32(1));
1893        // Row 1: expect x=42
1894        assert_eq!(result_variant.value(1), Variant::Int32(42));
1895    }
1896
1897    /// Test extracting shredded object field with type conversion
1898    #[test]
1899    fn test_shredded_object_field_as_int32() {
1900        let array = shredded_object_with_x_field_variant_array();
1901
1902        // Test: Extract the "x" field as Int32Array (type conversion)
1903        let field = Field::new("x", DataType::Int32, false);
1904        let options = GetOptions::new_with_path(VariantPath::try_from("x").unwrap())
1905            .with_as_type(Some(FieldRef::from(field)));
1906        let result = variant_get(&array, options).unwrap();
1907
1908        // Should get Int32Array
1909        let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(42)]));
1910        assert_eq!(&result, &expected);
1911    }
1912
1913    type ShreddedListLikeArrayGen = fn() -> ArrayRef;
1914    type ShreddedListLikeCase = (&'static str, ShreddedListLikeArrayGen);
1915
1916    fn shredded_list_like_cases() -> [ShreddedListLikeCase; 4] {
1917        [
1918            ("list", shredded_list_variant_array),
1919            ("large_list", shredded_large_list_variant_array),
1920            ("list_view", shredded_list_view_variant_array),
1921            ("large_list_view", shredded_large_list_view_variant_array),
1922        ]
1923    }
1924
1925    #[test]
1926    fn test_shredded_list_like_index_access_from_value_field() {
1927        let options = GetOptions::new_with_path(VariantPath::from(1));
1928
1929        for (case, array_gen) in shredded_list_like_cases() {
1930            let array = array_gen();
1931            let result = variant_get(&array, options.clone()).unwrap();
1932            let result_variant = VariantArray::try_new(&result).unwrap();
1933
1934            assert_eq!(result_variant.value(0), Variant::from("drama"), "{case}");
1935            assert_eq!(result_variant.value(1).as_int64(), Some(123), "{case}");
1936        }
1937    }
1938
1939    #[test]
1940    fn test_shredded_list_like_index_out_of_bounds_unsafe_cast_returns_null() {
1941        let options =
1942            GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
1943                safe: false,
1944                ..Default::default()
1945            });
1946
1947        for (case, array_gen) in shredded_list_like_cases() {
1948            let result = variant_get(&array_gen(), options.clone()).unwrap();
1949            let result_variant = VariantArray::try_new(&result).unwrap();
1950            assert_eq!(result_variant.value(0), Variant::Null, "{case}");
1951            assert_eq!(result_variant.value(1), Variant::Null, "{case}");
1952        }
1953    }
1954
1955    /// Test extracting shredded list-like field with type conversion.
1956    #[test]
1957    fn test_shredded_list_like_as_string() {
1958        let field = Field::new("typed_value", DataType::Utf8, false);
1959        let options = GetOptions::new_with_path(VariantPath::from(0))
1960            .with_as_type(Some(FieldRef::from(field)));
1961        let expected: ArrayRef = Arc::new(StringArray::from(vec![Some("comedy"), Some("horror")]));
1962
1963        for (case, array_gen) in shredded_list_like_cases() {
1964            let result = variant_get(&array_gen(), options.clone()).unwrap();
1965            assert_eq!(&result, &expected, "{case}");
1966        }
1967    }
1968
1969    #[test]
1970    fn test_shredded_list_like_index_access_from_value_field_as_int64() {
1971        let field = Field::new("typed_value", DataType::Int64, true);
1972        let options = GetOptions::new_with_path(VariantPath::from(1))
1973            .with_as_type(Some(FieldRef::from(field)));
1974        let expected: ArrayRef = Arc::new(Int64Array::from(vec![None, Some(123)]));
1975
1976        for (case, array_gen) in shredded_list_like_cases() {
1977            let result = variant_get(&array_gen(), options.clone()).unwrap();
1978            // "drama" -> NULL, 123 -> 123.
1979            assert_eq!(&result, &expected, "{case}");
1980        }
1981    }
1982
1983    #[test]
1984    fn test_shredded_list_in_struct_index_access() {
1985        let array = shredded_struct_with_list_variant_array();
1986        let options = GetOptions::new_with_path(VariantPath::try_from("a[1]").unwrap());
1987        let result = variant_get(&array, options).unwrap();
1988        let result_variant = VariantArray::try_new(&result).unwrap();
1989
1990        assert_eq!(result_variant.value(0), Variant::from("drama"));
1991        assert_eq!(result_variant.value(1).as_int64(), Some(123));
1992    }
1993
1994    #[test]
1995    fn test_shredded_struct_in_list_field_access() {
1996        let array = shredded_list_of_struct_variant_array();
1997        let field = Field::new("x", DataType::Int32, true);
1998        let path = VariantPath::from(0).join("x");
1999        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2000        let result = variant_get(&array, options).unwrap();
2001
2002        let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(3)]));
2003        assert_eq!(&result, &expected);
2004    }
2005
2006    #[test]
2007    fn test_shredded_list_of_lists_index_access() {
2008        let array = shredded_list_of_lists_variant_array();
2009        let path = VariantPath::from(0).join(1);
2010
2011        let result = variant_get(&array, GetOptions::new_with_path(path.clone())).unwrap();
2012        let result_variant = VariantArray::try_new(&result).unwrap();
2013        assert_eq!(result_variant.value(0), Variant::from("b"));
2014        assert_eq!(result_variant.value(1).as_int64(), Some(123));
2015
2016        let field = Field::new("typed_value", DataType::Int64, true);
2017        let casted = variant_get(
2018            &array,
2019            GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))),
2020        )
2021        .unwrap();
2022        let expected: ArrayRef = Arc::new(Int64Array::from(vec![None, Some(123)]));
2023        assert_eq!(&casted, &expected);
2024    }
2025
2026    /// Helper to create a shredded list-like variant array used by list index tests.
2027    ///
2028    /// Rows:
2029    /// 1. `["comedy", "drama"]` (fully shred-able as `Utf8`)
2030    /// 2. `["horror", 123]` (partially shredded, with fallback for the numeric element)
2031    fn shredded_list_like_variant_array(list_schema: DataType) -> ArrayRef {
2032        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
2033            Some(r#"["comedy", "drama"]"#),
2034            Some(r#"["horror", 123]"#),
2035        ]));
2036        let input = json_to_variant(&json_rows).unwrap();
2037
2038        let shredded = shred_variant(&input, &list_schema).unwrap();
2039        ArrayRef::from(shredded)
2040    }
2041
2042    fn shredded_list_of_lists_variant_array() -> ArrayRef {
2043        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
2044            Some(r#"[["a", "b"], ["c", "d"]]"#),
2045            Some(r#"[["x", 123], ["y", "z"]]"#),
2046        ]));
2047        let input = json_to_variant(&json_rows).unwrap();
2048
2049        let inner_list = DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)));
2050        let outer_list = DataType::List(Arc::new(Field::new("item", inner_list, true)));
2051        let shredded = shred_variant(&input, &outer_list).unwrap();
2052        ArrayRef::from(shredded)
2053    }
2054
2055    fn shredded_list_variant_array() -> ArrayRef {
2056        shredded_list_like_variant_array(DataType::List(Arc::new(Field::new(
2057            "item",
2058            DataType::Utf8,
2059            true,
2060        ))))
2061    }
2062
2063    fn shredded_large_list_variant_array() -> ArrayRef {
2064        shredded_list_like_variant_array(DataType::LargeList(Arc::new(Field::new(
2065            "item",
2066            DataType::Utf8,
2067            true,
2068        ))))
2069    }
2070
2071    fn shredded_list_view_variant_array() -> ArrayRef {
2072        shredded_list_like_variant_array(DataType::ListView(Arc::new(Field::new(
2073            "item",
2074            DataType::Utf8,
2075            true,
2076        ))))
2077    }
2078
2079    fn shredded_large_list_view_variant_array() -> ArrayRef {
2080        shredded_list_like_variant_array(DataType::LargeListView(Arc::new(Field::new(
2081            "item",
2082            DataType::Utf8,
2083            true,
2084        ))))
2085    }
2086
2087    fn shredded_struct_with_list_variant_array() -> ArrayRef {
2088        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
2089            Some(r#"{"a": ["comedy", "drama"]}"#),
2090            Some(r#"{"a": ["horror", 123]}"#),
2091        ]));
2092        let input = json_to_variant(&json_rows).unwrap();
2093
2094        let list_schema = DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)));
2095        let shredding_schema = ShreddedSchemaBuilder::default()
2096            .with_path("a", &list_schema)
2097            .unwrap()
2098            .build();
2099        let shredded = shred_variant(&input, &shredding_schema).unwrap();
2100        ArrayRef::from(shredded)
2101    }
2102
2103    fn shredded_list_of_struct_variant_array() -> ArrayRef {
2104        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
2105            Some(r#"[{"x": 1}, {"x": 2}]"#),
2106            Some(r#"[{"x": 3}, {"y": 4}]"#),
2107        ]));
2108        let input = json_to_variant(&json_rows).unwrap();
2109
2110        let struct_type =
2111            DataType::Struct(Fields::from(vec![Field::new("x", DataType::Int32, true)]));
2112        let list_schema = DataType::List(Arc::new(Field::new("item", struct_type, true)));
2113        let shredded = shred_variant(&input, &list_schema).unwrap();
2114        ArrayRef::from(shredded)
2115    }
2116
2117    /// Helper function to create a shredded variant array representing objects
2118    ///
2119    /// This creates an array that represents:
2120    /// Row 0: {"x": 1, "y": "foo"}  (x is shredded, y is in value field)
2121    /// Row 1: {"x": 42}             (x is shredded, perfect shredding)
2122    ///
2123    /// The physical layout follows the shredding spec where:
2124    /// - metadata: contains object metadata
2125    /// - typed_value: StructArray with field "x" (ShreddedVariantFieldArray)
2126    /// - value: contains fallback for unshredded fields like {"y": "foo"}
2127    /// - The "x" field has typed_value=Int32Array and value=NULL (perfect shredding)
2128    fn shredded_object_with_x_field_variant_array() -> ArrayRef {
2129        // Create the base metadata for objects
2130        let (metadata, y_field_value) = {
2131            let mut builder = parquet_variant::VariantBuilder::new();
2132            let mut obj = builder.new_object();
2133            obj.insert("x", Variant::Int32(42));
2134            obj.insert("y", Variant::from("foo"));
2135            obj.finish();
2136            builder.finish()
2137        };
2138
2139        // Create metadata array (same for both rows)
2140        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
2141
2142        // Create the main value field per the 3-step shredding spec:
2143        // Step 2: If field not in shredding schema, check value field
2144        // Row 0: {"y": "foo"} (y is not shredded, stays in value for step 2)
2145        // Row 1: {} (empty object - no unshredded fields)
2146        let empty_object_value = {
2147            let mut builder = parquet_variant::VariantBuilder::new();
2148            let obj = builder.new_object();
2149            obj.finish();
2150            let (_, value) = builder.finish();
2151            value
2152        };
2153
2154        let value_array = BinaryViewArray::from(vec![
2155            Some(y_field_value.as_slice()),      // Row 0 has {"y": "foo"}
2156            Some(empty_object_value.as_slice()), // Row 1 has {}
2157        ]);
2158
2159        // Create the "x" field as a ShreddedVariantFieldArray
2160        // This represents the shredded Int32 values for the "x" field
2161        let x_field_typed_value = Int32Array::from(vec![Some(1), Some(42)]);
2162
2163        // For perfect shredding of the x field, no "value" column, only typed_value
2164        let x_field_shredded = ShreddedVariantFieldArray::from_parts(
2165            None,
2166            Some(Arc::new(x_field_typed_value) as ArrayRef),
2167            None,
2168        );
2169
2170        // Create the main typed_value as a struct containing the "x" field
2171        let typed_value_fields = Fields::from(vec![Field::new(
2172            "x",
2173            x_field_shredded.data_type().clone(),
2174            true,
2175        )]);
2176        let typed_value_struct = StructArray::try_new(
2177            typed_value_fields,
2178            vec![ArrayRef::from(x_field_shredded)],
2179            None, // No nulls - both rows have the object structure
2180        )
2181        .unwrap();
2182
2183        // Create the main VariantArray
2184        ArrayRef::from(VariantArray::from_parts(
2185            Arc::new(metadata_array),
2186            Some(Arc::new(value_array)),
2187            Some(Arc::new(typed_value_struct)),
2188            None,
2189        ))
2190    }
2191
2192    /// Simple test to check if nested paths are supported by current implementation
2193    #[test]
2194    fn test_simple_nested_path_support() {
2195        // Check: How does VariantPath parse different strings?
2196        println!("Testing path parsing:");
2197
2198        let path_x = VariantPath::try_from("x").unwrap();
2199        let elements_x: Vec<_> = path_x.iter().collect();
2200        println!("  'x' -> {} elements: {:?}", elements_x.len(), elements_x);
2201
2202        let path_ax = VariantPath::try_from("a.x").unwrap();
2203        let elements_ax: Vec<_> = path_ax.iter().collect();
2204        println!(
2205            "  'a.x' -> {} elements: {:?}",
2206            elements_ax.len(),
2207            elements_ax
2208        );
2209
2210        let path_ax_alt = VariantPath::try_from("$.a.x").unwrap();
2211        let elements_ax_alt: Vec<_> = path_ax_alt.iter().collect();
2212        println!(
2213            "  '$.a.x' -> {} elements: {:?}",
2214            elements_ax_alt.len(),
2215            elements_ax_alt
2216        );
2217
2218        let path_nested = VariantPath::try_from("a").unwrap().join("x");
2219        let elements_nested: Vec<_> = path_nested.iter().collect();
2220        println!(
2221            "  VariantPath::try_from('a').unwrap().join('x') -> {} elements: {:?}",
2222            elements_nested.len(),
2223            elements_nested
2224        );
2225
2226        // Use your existing simple test data but try "a.x" instead of "x"
2227        let array = shredded_object_with_x_field_variant_array();
2228
2229        // Test if variant_get with REAL nested path throws not implemented error
2230        let real_nested_path = VariantPath::try_from("a").unwrap().join("x");
2231        let options = GetOptions::new_with_path(real_nested_path);
2232        let result = variant_get(&array, options);
2233
2234        match result {
2235            Ok(_) => {
2236                println!("Nested path 'a.x' works unexpectedly!");
2237            }
2238            Err(e) => {
2239                println!("Nested path 'a.x' error: {}", e);
2240                if e.to_string().contains("Not yet implemented")
2241                    || e.to_string().contains("NotYetImplemented")
2242                {
2243                    println!("This is expected - nested paths are not implemented");
2244                    return;
2245                }
2246                // Any other error is also expected for now
2247                println!("This shows nested paths need implementation");
2248            }
2249        }
2250    }
2251
2252    /// Test comprehensive variant_get scenarios with Int32 conversion
2253    /// Test depth 0: Direct field access "x" with Int32 conversion
2254    /// Covers shredded vs non-shredded VariantArrays for simple field access
2255    #[test]
2256    fn test_depth_0_int32_conversion() {
2257        println!("=== Testing Depth 0: Direct field access ===");
2258
2259        // Non-shredded test data: [{"x": 42}, {"x": "foo"}, {"y": 10}]
2260        let unshredded_array = create_depth_0_test_data();
2261
2262        let field = Field::new("result", DataType::Int32, true);
2263        let path = VariantPath::try_from("x").unwrap();
2264        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2265        let result = variant_get(&unshredded_array, options).unwrap();
2266
2267        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2268            Some(42), // {"x": 42} -> 42
2269            None,     // {"x": "foo"} -> NULL (type mismatch)
2270            None,     // {"y": 10} -> NULL (field missing)
2271        ]));
2272        assert_eq!(&result, &expected);
2273        println!("Depth 0 (unshredded) passed");
2274
2275        // Shredded test data: using simplified approach based on working pattern
2276        let shredded_array = create_depth_0_shredded_test_data_simple();
2277
2278        let field = Field::new("result", DataType::Int32, true);
2279        let path = VariantPath::try_from("x").unwrap();
2280        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2281        let result = variant_get(&shredded_array, options).unwrap();
2282
2283        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2284            Some(42), // {"x": 42} -> 42 (from typed_value)
2285            None,     // {"x": "foo"} -> NULL (type mismatch, from value field)
2286        ]));
2287        assert_eq!(&result, &expected);
2288        println!("Depth 0 (shredded) passed");
2289    }
2290
2291    /// Test depth 1: Single nested field access "a.x" with Int32 conversion
2292    /// Covers shredded vs non-shredded VariantArrays for nested field access
2293    #[test]
2294    fn test_depth_1_int32_conversion() {
2295        println!("=== Testing Depth 1: Single nested field access ===");
2296
2297        // Non-shredded test data from the GitHub issue
2298        let unshredded_array = create_nested_path_test_data();
2299
2300        let field = Field::new("result", DataType::Int32, true);
2301        let path = VariantPath::try_from("a.x").unwrap(); // Dot notation!
2302        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2303        let result = variant_get(&unshredded_array, options).unwrap();
2304
2305        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2306            Some(55), // {"a": {"x": 55}} -> 55
2307            None,     // {"a": {"x": "foo"}} -> NULL (type mismatch)
2308        ]));
2309        assert_eq!(&result, &expected);
2310        println!("Depth 1 (unshredded) passed");
2311
2312        // Shredded test data: depth 1 nested shredding
2313        let shredded_array = create_depth_1_shredded_test_data_working();
2314
2315        let field = Field::new("result", DataType::Int32, true);
2316        let path = VariantPath::try_from("a.x").unwrap(); // Dot notation!
2317        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2318        let result = variant_get(&shredded_array, options).unwrap();
2319
2320        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2321            Some(55), // {"a": {"x": 55}} -> 55 (from nested shredded x)
2322            None,     // {"a": {"x": "foo"}} -> NULL (type mismatch in nested value)
2323        ]));
2324        assert_eq!(&result, &expected);
2325        println!("Depth 1 (shredded) passed");
2326    }
2327
2328    /// Test depth 2: Double nested field access "a.b.x" with Int32 conversion  
2329    /// Covers shredded vs non-shredded VariantArrays for deeply nested field access
2330    #[test]
2331    fn test_depth_2_int32_conversion() {
2332        println!("=== Testing Depth 2: Double nested field access ===");
2333
2334        // Non-shredded test data: [{"a": {"b": {"x": 100}}}, {"a": {"b": {"x": "bar"}}}, {"a": {"b": {"y": 200}}}]
2335        let unshredded_array = create_depth_2_test_data();
2336
2337        let field = Field::new("result", DataType::Int32, true);
2338        let path = VariantPath::try_from("a.b.x").unwrap(); // Double nested dot notation!
2339        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2340        let result = variant_get(&unshredded_array, options).unwrap();
2341
2342        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2343            Some(100), // {"a": {"b": {"x": 100}}} -> 100
2344            None,      // {"a": {"b": {"x": "bar"}}} -> NULL (type mismatch)
2345            None,      // {"a": {"b": {"y": 200}}} -> NULL (field missing)
2346        ]));
2347        assert_eq!(&result, &expected);
2348        println!("Depth 2 (unshredded) passed");
2349
2350        // Shredded test data: depth 2 nested shredding
2351        let shredded_array = create_depth_2_shredded_test_data_working();
2352
2353        let field = Field::new("result", DataType::Int32, true);
2354        let path = VariantPath::try_from("a.b.x").unwrap(); // Double nested dot notation!
2355        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
2356        let result = variant_get(&shredded_array, options).unwrap();
2357
2358        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
2359            Some(100), // {"a": {"b": {"x": 100}}} -> 100 (from deeply nested shredded x)
2360            None,      // {"a": {"b": {"x": "bar"}}} -> NULL (type mismatch in deep value)
2361            None,      // {"a": {"b": {"y": 200}}} -> NULL (field missing in deep structure)
2362        ]));
2363        assert_eq!(&result, &expected);
2364        println!("Depth 2 (shredded) passed");
2365    }
2366
2367    /// Test that demonstrates what CURRENTLY WORKS
2368    ///
2369    /// This shows that nested path functionality does work, but only when the
2370    /// test data matches what the current implementation expects
2371    #[test]
2372    fn test_current_nested_path_functionality() {
2373        let array = shredded_object_with_x_field_variant_array();
2374
2375        // Test: Extract the "x" field (single level) - this works
2376        let single_path = VariantPath::try_from("x").unwrap();
2377        let field = Field::new("result", DataType::Int32, true);
2378        let options =
2379            GetOptions::new_with_path(single_path).with_as_type(Some(FieldRef::from(field)));
2380        let result = variant_get(&array, options).unwrap();
2381
2382        println!("Single path 'x' works - result: {:?}", result);
2383
2384        // Test: Try nested path "a.x" - this is what we need to implement
2385        let nested_path = VariantPath::try_from("a").unwrap().join("x");
2386        let field = Field::new("result", DataType::Int32, true);
2387        let options =
2388            GetOptions::new_with_path(nested_path).with_as_type(Some(FieldRef::from(field)));
2389        let result = variant_get(&array, options).unwrap();
2390
2391        println!("Nested path 'a.x' result: {:?}", result);
2392    }
2393
2394    #[test]
2395    fn test_variant_get_as_variant_from_unshredded_input() {
2396        let (unshredded, _) = create_variant_get_as_variant_test_data();
2397        let unshredded_field = VariantArray::try_new(&unshredded).unwrap().field("result");
2398        assert_variant_field_extraction_returns_unshredded_variant(&unshredded, &unshredded_field);
2399    }
2400
2401    #[test]
2402    fn test_variant_get_as_variant_from_shredded_input() {
2403        let (unshredded, shredded) = create_variant_get_as_variant_test_data();
2404        let unshredded_field = VariantArray::try_new(&unshredded).unwrap().field("result");
2405        assert_variant_field_extraction_returns_unshredded_variant(&shredded, &unshredded_field);
2406    }
2407
2408    #[test]
2409    fn test_variant_get_as_shredded_variant_is_not_yet_supported() {
2410        let (_, shredded) = create_variant_get_as_variant_test_data();
2411        // Deriving the request field from the shredded array yields a `VariantType` field whose
2412        // struct carries a `typed_value` -- a request to shred the output. That is unsupported
2413        // (https://github.com/apache/arrow-rs/issues/8153) and must error, not silently return a
2414        // plain binary variant.
2415        let shredded_field = VariantArray::try_new(&shredded).unwrap().field("result");
2416        assert!(requested_field_is_shredded(Some(&shredded_field)));
2417
2418        let options = GetOptions::new_with_path(VariantPath::try_from("field_name").unwrap())
2419            .with_as_type(Some(FieldRef::from(shredded_field)));
2420        let err = variant_get(&shredded, options).unwrap_err();
2421        assert!(
2422            matches!(err, ArrowError::NotYetImplemented(_)),
2423            "expected NotYetImplemented, got {err:?}"
2424        );
2425    }
2426
2427    fn create_variant_get_as_variant_test_data() -> (ArrayRef, ArrayRef) {
2428        let input_json: ArrayRef = Arc::new(StringArray::from(vec![
2429            Some(r#"{"field_name": {"k": 100000}}"#),
2430            Some(r#"{"field_name": {"k": "s"}}"#),
2431        ]));
2432
2433        let unshredded = ArrayRef::from(json_to_variant(&input_json).unwrap());
2434        let unshredded_variant = VariantArray::try_new(&unshredded).unwrap();
2435
2436        let as_type = DataType::Struct(Fields::from(vec![Field::new(
2437            "field_name",
2438            DataType::Struct(Fields::from(vec![Field::new("k", DataType::Int32, true)])),
2439            true,
2440        )]));
2441        let shredded = ArrayRef::from(shred_variant(&unshredded_variant, &as_type).unwrap());
2442
2443        (unshredded, shredded)
2444    }
2445
2446    fn assert_variant_field_extraction_returns_unshredded_variant(
2447        input: &ArrayRef,
2448        variant_field: &Field,
2449    ) {
2450        let options = GetOptions::new_with_path(VariantPath::try_from("field_name").unwrap())
2451            .with_as_type(Some(FieldRef::from(variant_field.clone())));
2452
2453        let result = variant_get(input, options).unwrap();
2454        let result_variant = VariantArray::try_new(&result).unwrap();
2455
2456        assert!(result_variant.typed_value_column().is_none());
2457        assert!(result_variant.value_column().is_some());
2458
2459        let expected_json: ArrayRef = Arc::new(StringArray::from(vec![
2460            Some(r#"{"k":100000}"#),
2461            Some(r#"{"k":"s"}"#),
2462        ]));
2463        let expected = json_to_variant(&expected_json).unwrap();
2464
2465        assert_eq!(result_variant.len(), expected.len());
2466        for i in 0..result_variant.len() {
2467            assert_eq!(result_variant.is_null(i), expected.is_null(i));
2468            if !result_variant.is_null(i) {
2469                assert_eq!(result_variant.value(i), expected.value(i));
2470            }
2471        }
2472    }
2473
2474    /// Create test data for depth 0 (direct field access)
2475    /// [{"x": 42}, {"x": "foo"}, {"y": 10}]
2476    fn create_depth_0_test_data() -> ArrayRef {
2477        let mut builder = crate::VariantArrayBuilder::new(3);
2478
2479        // Row 1: {"x": 42}
2480        {
2481            let json_str = r#"{"x": 42}"#;
2482            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2483            if let Ok(variant_array) = json_to_variant(&string_array) {
2484                builder.append_variant(variant_array.value(0));
2485            } else {
2486                builder.append_null();
2487            }
2488        }
2489
2490        // Row 2: {"x": "foo"}
2491        {
2492            let json_str = r#"{"x": "foo"}"#;
2493            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2494            if let Ok(variant_array) = json_to_variant(&string_array) {
2495                builder.append_variant(variant_array.value(0));
2496            } else {
2497                builder.append_null();
2498            }
2499        }
2500
2501        // Row 3: {"y": 10} (missing "x" field)
2502        {
2503            let json_str = r#"{"y": 10}"#;
2504            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2505            if let Ok(variant_array) = json_to_variant(&string_array) {
2506                builder.append_variant(variant_array.value(0));
2507            } else {
2508                builder.append_null();
2509            }
2510        }
2511
2512        ArrayRef::from(builder.build())
2513    }
2514
2515    /// Create test data for depth 1 (single nested field)
2516    /// This represents the exact scenarios from the GitHub issue: "a.x"
2517    fn create_nested_path_test_data() -> ArrayRef {
2518        let mut builder = crate::VariantArrayBuilder::new(2);
2519
2520        // Row 1: {"a": {"x": 55}, "b": 42}
2521        {
2522            let json_str = r#"{"a": {"x": 55}, "b": 42}"#;
2523            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2524            if let Ok(variant_array) = json_to_variant(&string_array) {
2525                builder.append_variant(variant_array.value(0));
2526            } else {
2527                builder.append_null();
2528            }
2529        }
2530
2531        // Row 2: {"a": {"x": "foo"}, "b": 42}
2532        {
2533            let json_str = r#"{"a": {"x": "foo"}, "b": 42}"#;
2534            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2535            if let Ok(variant_array) = json_to_variant(&string_array) {
2536                builder.append_variant(variant_array.value(0));
2537            } else {
2538                builder.append_null();
2539            }
2540        }
2541
2542        ArrayRef::from(builder.build())
2543    }
2544
2545    /// Create test data for depth 2 (double nested field)
2546    /// [{"a": {"b": {"x": 100}}}, {"a": {"b": {"x": "bar"}}}, {"a": {"b": {"y": 200}}}]
2547    fn create_depth_2_test_data() -> ArrayRef {
2548        let mut builder = crate::VariantArrayBuilder::new(3);
2549
2550        // Row 1: {"a": {"b": {"x": 100}}}
2551        {
2552            let json_str = r#"{"a": {"b": {"x": 100}}}"#;
2553            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2554            if let Ok(variant_array) = json_to_variant(&string_array) {
2555                builder.append_variant(variant_array.value(0));
2556            } else {
2557                builder.append_null();
2558            }
2559        }
2560
2561        // Row 2: {"a": {"b": {"x": "bar"}}}
2562        {
2563            let json_str = r#"{"a": {"b": {"x": "bar"}}}"#;
2564            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2565            if let Ok(variant_array) = json_to_variant(&string_array) {
2566                builder.append_variant(variant_array.value(0));
2567            } else {
2568                builder.append_null();
2569            }
2570        }
2571
2572        // Row 3: {"a": {"b": {"y": 200}}} (missing "x" field)
2573        {
2574            let json_str = r#"{"a": {"b": {"y": 200}}}"#;
2575            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
2576            if let Ok(variant_array) = json_to_variant(&string_array) {
2577                builder.append_variant(variant_array.value(0));
2578            } else {
2579                builder.append_null();
2580            }
2581        }
2582
2583        ArrayRef::from(builder.build())
2584    }
2585
2586    /// Create simple shredded test data for depth 0 using a simplified working pattern
2587    /// Creates 2 rows: [{"x": 42}, {"x": "foo"}] with "x" shredded where possible
2588    fn create_depth_0_shredded_test_data_simple() -> ArrayRef {
2589        // Create base metadata using the working pattern
2590        let (metadata, string_x_value) = {
2591            let mut builder = parquet_variant::VariantBuilder::new();
2592            let mut obj = builder.new_object();
2593            obj.insert("x", Variant::from("foo"));
2594            obj.finish();
2595            builder.finish()
2596        };
2597
2598        // Metadata array (same for both rows)
2599        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
2600
2601        // Value array following the 3-step shredding spec:
2602        // Row 0: {} (x is shredded, no unshredded fields)
2603        // Row 1: {"x": "foo"} (x is a string, can't be shredded to Int32)
2604        let empty_object_value = {
2605            let mut builder = parquet_variant::VariantBuilder::new();
2606            let obj = builder.new_object();
2607            obj.finish();
2608            let (_, value) = builder.finish();
2609            value
2610        };
2611
2612        let value_array = BinaryViewArray::from(vec![
2613            Some(empty_object_value.as_slice()), // Row 0: {} (x shredded out)
2614            Some(string_x_value.as_slice()),     // Row 1: {"x": "foo"} (fallback)
2615        ]);
2616
2617        // Create the "x" field as a ShreddedVariantFieldArray
2618        let x_field_typed_value = Int32Array::from(vec![Some(42), None]);
2619
2620        // For the x field, only typed_value (perfect shredding when possible)
2621        let x_field_shredded = ShreddedVariantFieldArray::from_parts(
2622            None,
2623            Some(Arc::new(x_field_typed_value) as ArrayRef),
2624            None,
2625        );
2626
2627        // Create the main typed_value as a struct containing the "x" field
2628        let typed_value_fields = Fields::from(vec![Field::new(
2629            "x",
2630            x_field_shredded.data_type().clone(),
2631            true,
2632        )]);
2633        let typed_value_struct = StructArray::try_new(
2634            typed_value_fields,
2635            vec![ArrayRef::from(x_field_shredded)],
2636            None,
2637        )
2638        .unwrap();
2639
2640        // Build final VariantArray
2641        ArrayRef::from(VariantArray::from_parts(
2642            Arc::new(metadata_array),
2643            Some(Arc::new(value_array)),
2644            Some(Arc::new(typed_value_struct)),
2645            None,
2646        ))
2647    }
2648
2649    /// Create working depth 1 shredded test data based on the existing working pattern
2650    /// This creates a properly structured shredded variant for "a.x" where:
2651    /// - Row 0: {"a": {"x": 55}, "b": 42} with a.x shredded into typed_value
2652    /// - Row 1: {"a": {"x": "foo"}, "b": 42} with a.x fallback to value field due to type mismatch
2653    fn create_depth_1_shredded_test_data_working() -> ArrayRef {
2654        // Create metadata following the working pattern from shredded_object_with_x_field_variant_array
2655        let (metadata, _) = {
2656            // Create nested structure: {"a": {"x": 55}, "b": 42}
2657            let mut builder = parquet_variant::VariantBuilder::new();
2658            let mut obj = builder.new_object();
2659
2660            // Create the nested "a" object
2661            let mut a_obj = obj.new_object("a");
2662            a_obj.insert("x", Variant::Int32(55));
2663            a_obj.finish();
2664
2665            obj.insert("b", Variant::Int32(42));
2666            obj.finish();
2667            builder.finish()
2668        };
2669
2670        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
2671
2672        // Create value arrays for the fallback case
2673        // Following the spec: if field cannot be shredded, it stays in value
2674        let empty_object_value = {
2675            let mut builder = parquet_variant::VariantBuilder::new();
2676            let obj = builder.new_object();
2677            obj.finish();
2678            let (_, value) = builder.finish();
2679            value
2680        };
2681
2682        // Row 1 fallback: use the working pattern from the existing shredded test
2683        // This avoids metadata issues by using the simple fallback approach
2684        let row1_fallback = {
2685            let mut builder = parquet_variant::VariantBuilder::new();
2686            let mut obj = builder.new_object();
2687            obj.insert("fallback", Variant::from("data"));
2688            obj.finish();
2689            let (_, value) = builder.finish();
2690            value
2691        };
2692
2693        let value_array = BinaryViewArray::from(vec![
2694            Some(empty_object_value.as_slice()), // Row 0: {} (everything shredded except b in unshredded fields)
2695            Some(row1_fallback.as_slice()), // Row 1: {"a": {"x": "foo"}, "b": 42} (a.x can't be shredded)
2696        ]);
2697
2698        // Create the nested shredded structure
2699        // Level 2: x field (the deepest level)
2700        let x_typed_value = Int32Array::from(vec![Some(55), None]);
2701        let x_field_shredded = ShreddedVariantFieldArray::from_parts(
2702            None,
2703            Some(Arc::new(x_typed_value) as ArrayRef),
2704            None,
2705        );
2706
2707        // Level 1: a field containing x field + value field for fallbacks
2708        // The "a" field needs both typed_value (for shredded x) and value (for fallback cases)
2709
2710        // Create the value field for "a" (for cases where a.x can't be shredded)
2711        let a_value_data = {
2712            let mut builder = parquet_variant::VariantBuilder::new();
2713            let obj = builder.new_object();
2714            obj.finish();
2715            let (_, value) = builder.finish();
2716            value
2717        };
2718        let a_value_array = BinaryViewArray::from(vec![
2719            None,                          // Row 0: x is shredded, so no value fallback needed
2720            Some(a_value_data.as_slice()), // Row 1: fallback for a.x="foo" (but logic will check typed_value first)
2721        ]);
2722
2723        let a_inner_fields = Fields::from(vec![Field::new(
2724            "x",
2725            x_field_shredded.data_type().clone(),
2726            true,
2727        )]);
2728        let a_inner_typed_value = Arc::new(
2729            StructArray::try_new(a_inner_fields, vec![ArrayRef::from(x_field_shredded)], None)
2730                .unwrap(),
2731        ) as ArrayRef;
2732        let a_field_shredded = ShreddedVariantFieldArray::from_parts(
2733            Some(Arc::new(a_value_array)),
2734            Some(a_inner_typed_value),
2735            None,
2736        );
2737
2738        // Level 0: main typed_value struct containing a field
2739        let typed_value_fields = Fields::from(vec![Field::new(
2740            "a",
2741            a_field_shredded.data_type().clone(),
2742            true,
2743        )]);
2744        let typed_value_struct = StructArray::try_new(
2745            typed_value_fields,
2746            vec![ArrayRef::from(a_field_shredded)],
2747            None,
2748        )
2749        .unwrap();
2750
2751        // Build final VariantArray
2752        ArrayRef::from(VariantArray::from_parts(
2753            Arc::new(metadata_array),
2754            Some(Arc::new(value_array)),
2755            Some(Arc::new(typed_value_struct)),
2756            None,
2757        ))
2758    }
2759
2760    /// Create working depth 2 shredded test data for "a.b.x" paths
2761    /// This creates a 3-level nested shredded structure where:
2762    /// - Row 0: {"a": {"b": {"x": 100}}} with a.b.x shredded into typed_value
2763    /// - Row 1: {"a": {"b": {"x": "bar"}}} with type mismatch fallback
2764    /// - Row 2: {"a": {"b": {"y": 200}}} with missing field fallback
2765    fn create_depth_2_shredded_test_data_working() -> ArrayRef {
2766        // Create metadata following the working pattern
2767        let (metadata, _) = {
2768            // Create deeply nested structure: {"a": {"b": {"x": 100}}}
2769            let mut builder = parquet_variant::VariantBuilder::new();
2770            let mut obj = builder.new_object();
2771
2772            // Create the nested "a.b" structure
2773            let mut a_obj = obj.new_object("a");
2774            let mut b_obj = a_obj.new_object("b");
2775            b_obj.insert("x", Variant::Int32(100));
2776            b_obj.finish();
2777            a_obj.finish();
2778
2779            obj.finish();
2780            builder.finish()
2781        };
2782
2783        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
2784
2785        // Create value arrays for fallback cases
2786        let empty_object_value = {
2787            let mut builder = parquet_variant::VariantBuilder::new();
2788            let obj = builder.new_object();
2789            obj.finish();
2790            let (_, value) = builder.finish();
2791            value
2792        };
2793
2794        // Simple fallback values - avoiding complex nested metadata
2795        let value_array = BinaryViewArray::from(vec![
2796            Some(empty_object_value.as_slice()), // Row 0: fully shredded
2797            Some(empty_object_value.as_slice()), // Row 1: fallback (simplified)
2798            Some(empty_object_value.as_slice()), // Row 2: fallback (simplified)
2799        ]);
2800
2801        // Create the deeply nested shredded structure: a.b.x
2802
2803        // Level 3: x field (deepest level)
2804        let x_typed_value = Int32Array::from(vec![Some(100), None, None]);
2805        let x_field_shredded = ShreddedVariantFieldArray::from_parts(
2806            None,
2807            Some(Arc::new(x_typed_value) as ArrayRef),
2808            None,
2809        );
2810
2811        // Level 2: b field containing x field + value field
2812        let b_value_data = {
2813            let mut builder = parquet_variant::VariantBuilder::new();
2814            let obj = builder.new_object();
2815            obj.finish();
2816            let (_, value) = builder.finish();
2817            value
2818        };
2819        let b_value_array = BinaryViewArray::from(vec![
2820            None,                          // Row 0: x is shredded
2821            Some(b_value_data.as_slice()), // Row 1: fallback for b.x="bar"
2822            Some(b_value_data.as_slice()), // Row 2: fallback for b.y=200
2823        ]);
2824
2825        let b_inner_fields = Fields::from(vec![Field::new(
2826            "x",
2827            x_field_shredded.data_type().clone(),
2828            true,
2829        )]);
2830        let b_inner_typed_value = Arc::new(
2831            StructArray::try_new(b_inner_fields, vec![ArrayRef::from(x_field_shredded)], None)
2832                .unwrap(),
2833        ) as ArrayRef;
2834        let b_field_shredded = ShreddedVariantFieldArray::from_parts(
2835            Some(Arc::new(b_value_array)),
2836            Some(b_inner_typed_value),
2837            None,
2838        );
2839
2840        // Level 1: a field containing b field + value field
2841        let a_value_data = {
2842            let mut builder = parquet_variant::VariantBuilder::new();
2843            let obj = builder.new_object();
2844            obj.finish();
2845            let (_, value) = builder.finish();
2846            value
2847        };
2848        let a_value_array = BinaryViewArray::from(vec![
2849            None,                          // Row 0: b is shredded
2850            Some(a_value_data.as_slice()), // Row 1: fallback for a.b.*
2851            Some(a_value_data.as_slice()), // Row 2: fallback for a.b.*
2852        ]);
2853
2854        let a_inner_fields = Fields::from(vec![Field::new(
2855            "b",
2856            b_field_shredded.data_type().clone(),
2857            true,
2858        )]);
2859        let a_inner_typed_value = Arc::new(
2860            StructArray::try_new(a_inner_fields, vec![ArrayRef::from(b_field_shredded)], None)
2861                .unwrap(),
2862        ) as ArrayRef;
2863        let a_field_shredded = ShreddedVariantFieldArray::from_parts(
2864            Some(Arc::new(a_value_array)),
2865            Some(a_inner_typed_value),
2866            None,
2867        );
2868
2869        // Level 0: main typed_value struct containing a field
2870        let typed_value_fields = Fields::from(vec![Field::new(
2871            "a",
2872            a_field_shredded.data_type().clone(),
2873            true,
2874        )]);
2875        let typed_value_struct = StructArray::try_new(
2876            typed_value_fields,
2877            vec![ArrayRef::from(a_field_shredded)],
2878            None,
2879        )
2880        .unwrap();
2881
2882        // Build final VariantArray
2883        ArrayRef::from(VariantArray::from_parts(
2884            Arc::new(metadata_array),
2885            Some(Arc::new(value_array)),
2886            Some(Arc::new(typed_value_struct)),
2887            None,
2888        ))
2889    }
2890
2891    #[test]
2892    fn test_field_path_non_struct_returns_missing_path_step() {
2893        // Use the existing simple test data that has Int32 as typed_value
2894        let variant_array = perfectly_shredded_int32_variant_array();
2895
2896        for safe in [true, false] {
2897            let options = GetOptions {
2898                path: VariantPath::try_from("nonexistent_field").unwrap(),
2899                as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2900                cast_options: CastOptions {
2901                    safe,
2902                    ..Default::default()
2903                },
2904            };
2905
2906            let result_array = variant_get(&variant_array, options).unwrap();
2907            assert_eq!(result_array.len(), 3);
2908            assert!(result_array.is_null(0));
2909            assert!(result_array.is_null(1));
2910            assert!(result_array.is_null(2));
2911        }
2912    }
2913
2914    #[test]
2915    fn test_strict_cast_options_index_on_non_list_returns_null() {
2916        use arrow::compute::CastOptions;
2917        use arrow::datatypes::{DataType, Field};
2918        use parquet_variant::VariantPath;
2919        use std::sync::Arc;
2920
2921        // Use existing test data that has Int32 typed_value at the top level.
2922        let variant_array = perfectly_shredded_int32_variant_array();
2923        let options = GetOptions {
2924            path: VariantPath::from(0),
2925            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2926            cast_options: CastOptions {
2927                safe: false,
2928                ..Default::default()
2929            },
2930        };
2931
2932        let variant_array_ref: Arc<dyn Array> = variant_array.clone();
2933        let result = variant_get(&variant_array_ref, options).unwrap();
2934
2935        assert_eq!(result.len(), 3);
2936        assert!(result.is_null(0));
2937        assert!(result.is_null(1));
2938        assert!(result.is_null(2));
2939    }
2940
2941    #[test]
2942    fn test_error_message_boolean_type_display() {
2943        let mut builder = VariantArrayBuilder::new(1);
2944        builder.append_variant(Variant::from("abcd"));
2945        let variant_array: ArrayRef = ArrayRef::from(builder.build());
2946
2947        // Request Boolean with strict casting to force an error
2948        let options = GetOptions {
2949            path: VariantPath::default(),
2950            as_type: Some(Arc::new(Field::new("result", DataType::Boolean, true))),
2951            cast_options: CastOptions {
2952                safe: false,
2953                ..Default::default()
2954            },
2955        };
2956
2957        let err = variant_get(&variant_array, options).unwrap_err();
2958        let msg = err.to_string();
2959        assert!(msg.contains("Failed to extract primitive of type Boolean"));
2960    }
2961
2962    #[test]
2963    fn test_error_message_numeric_type_display() {
2964        let mut builder = VariantArrayBuilder::new(1);
2965        builder.append_variant(Variant::from("abcd"));
2966        let variant_array: ArrayRef = ArrayRef::from(builder.build());
2967
2968        // Request Float32 with strict casting to force an error
2969        let options = GetOptions {
2970            path: VariantPath::default(),
2971            as_type: Some(Arc::new(Field::new("result", DataType::Float32, true))),
2972            cast_options: CastOptions {
2973                safe: false,
2974                ..Default::default()
2975            },
2976        };
2977
2978        let err = variant_get(&variant_array, options).unwrap_err();
2979        let msg = err.to_string();
2980        assert!(msg.contains("Failed to extract primitive of type Float32"));
2981    }
2982
2983    #[test]
2984    fn test_error_message_temporal_type_display() {
2985        let mut builder = VariantArrayBuilder::new(1);
2986        builder.append_variant(Variant::BooleanFalse);
2987        let variant_array: ArrayRef = ArrayRef::from(builder.build());
2988
2989        // Request Timestamp with strict casting to force an error
2990        let options = GetOptions {
2991            path: VariantPath::default(),
2992            as_type: Some(Arc::new(Field::new(
2993                "result",
2994                DataType::Timestamp(TimeUnit::Nanosecond, None),
2995                true,
2996            ))),
2997            cast_options: CastOptions {
2998                safe: false,
2999                ..Default::default()
3000            },
3001        };
3002
3003        let err = variant_get(&variant_array, options).unwrap_err();
3004        let msg = err.to_string();
3005        assert!(msg.contains("Failed to extract primitive of type Timestamp(ns)"));
3006    }
3007
3008    #[test]
3009    fn test_null_buffer_union_for_shredded_paths() {
3010        // Test that null buffers are properly unioned when traversing shredded paths
3011        // This test verifies scovich's null buffer union requirement
3012
3013        // Create a depth-1 shredded variant array where:
3014        // - The top-level variant array has some nulls
3015        // - The nested typed_value also has some nulls
3016        // - The result should be the union of both null buffers
3017
3018        let variant_array = create_depth_1_shredded_test_data_working();
3019
3020        // Get the field "x" which should union nulls from:
3021        // 1. The top-level variant array nulls
3022        // 2. The "a" field's typed_value nulls
3023        // 3. The "x" field's typed_value nulls
3024        let options = GetOptions {
3025            path: VariantPath::try_from("a.x").unwrap(),
3026            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
3027            cast_options: CastOptions::default(),
3028        };
3029
3030        let result = variant_get(&variant_array, options).unwrap();
3031
3032        // Verify the result length matches input
3033        assert_eq!(result.len(), variant_array.len());
3034
3035        // The null pattern should reflect the union of all ancestor nulls
3036        // Row 0: Should have valid data (path exists and is shredded as Int32)
3037        // Row 1: Should be null (due to type mismatch - "foo" can't cast to Int32)
3038        assert!(!result.is_null(0), "Row 0 should have valid Int32 data");
3039        assert!(
3040            result.is_null(1),
3041            "Row 1 should be null due to type casting failure"
3042        );
3043
3044        // Verify the actual values
3045        let int32_result = result.as_any().downcast_ref::<Int32Array>().unwrap();
3046        assert_eq!(int32_result.value(0), 55); // The valid Int32 value
3047    }
3048
3049    #[test]
3050    fn test_struct_null_mask_union_from_children() {
3051        // Test that struct null masks properly union nulls from children field extractions
3052        // This verifies scovich's concern about incomplete null masks in struct construction
3053
3054        // Create test data where some fields will fail type casting
3055        let json_strings = vec![
3056            r#"{"a": 42, "b": "hello"}"#, // Row 0: a=42 (castable to int), b="hello" (not castable to int)
3057            r#"{"a": "world", "b": 100}"#, // Row 1: a="world" (not castable to int), b=100 (castable to int)
3058            r#"{"a": 55, "b": 77}"#,       // Row 2: a=55 (castable to int), b=77 (castable to int)
3059        ];
3060
3061        let string_array: Arc<dyn arrow::array::Array> = Arc::new(StringArray::from(json_strings));
3062        let variant_array = json_to_variant(&string_array).unwrap();
3063
3064        // Request extraction as a struct with both fields as Int32
3065        // This should create child arrays where some fields are null due to casting failures
3066        let struct_fields = Fields::from(vec![
3067            Field::new("a", DataType::Int32, true),
3068            Field::new("b", DataType::Int32, true),
3069        ]);
3070        let struct_type = DataType::Struct(struct_fields);
3071
3072        let options = GetOptions {
3073            path: VariantPath::default(), // Extract the whole object as struct
3074            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
3075            cast_options: CastOptions::default(),
3076        };
3077
3078        let variant_array_ref = ArrayRef::from(variant_array);
3079        let result = variant_get(&variant_array_ref, options).unwrap();
3080
3081        // Verify the result is a StructArray
3082        let struct_result = result.as_struct();
3083        assert_eq!(struct_result.len(), 3);
3084
3085        // Get the individual field arrays
3086        let field_a = struct_result
3087            .column(0)
3088            .as_any()
3089            .downcast_ref::<Int32Array>()
3090            .unwrap();
3091        let field_b = struct_result
3092            .column(1)
3093            .as_any()
3094            .downcast_ref::<Int32Array>()
3095            .unwrap();
3096
3097        // Verify field values and nulls
3098        // Row 0: a=42 (valid), b=null (casting failure)
3099        assert!(!field_a.is_null(0));
3100        assert_eq!(field_a.value(0), 42);
3101        assert!(field_b.is_null(0)); // "hello" can't cast to int
3102
3103        // Row 1: a=null (casting failure), b=100 (valid)
3104        assert!(field_a.is_null(1)); // "world" can't cast to int
3105        assert!(!field_b.is_null(1));
3106        assert_eq!(field_b.value(1), 100);
3107
3108        // Row 2: a=55 (valid), b=77 (valid)
3109        assert!(!field_a.is_null(2));
3110        assert_eq!(field_a.value(2), 55);
3111        assert!(!field_b.is_null(2));
3112        assert_eq!(field_b.value(2), 77);
3113
3114        // Verify the struct-level null mask properly unions child nulls
3115        // The struct should NOT be null in any row because each row has at least one valid field
3116        // (This tests that we're not incorrectly making the entire struct null when children fail)
3117        assert!(!struct_result.is_null(0)); // Has valid field 'a'
3118        assert!(!struct_result.is_null(1)); // Has valid field 'b'
3119        assert!(!struct_result.is_null(2)); // Has both valid fields
3120    }
3121
3122    #[test]
3123    fn test_field_nullability_preservation() {
3124        // Test that field nullability from GetOptions.as_type is preserved in the result
3125
3126        let json_strings = vec![
3127            r#"{"x": 42}"#,                  // Row 0: Valid int that should convert to Int32
3128            r#"{"x": "not_a_number"}"#,      // Row 1: String that can't cast to Int32
3129            r#"{"x": null}"#,                // Row 2: Explicit null value
3130            r#"{"x": "hello"}"#,             // Row 3: Another string (wrong type)
3131            r#"{"y": 100}"#,                 // Row 4: Missing "x" field (SQL NULL case)
3132            r#"{"x": 127}"#, // Row 5: Small int (could be Int8, widening cast candidate)
3133            r#"{"x": 32767}"#, // Row 6: Medium int (could be Int16, widening cast candidate)
3134            r#"{"x": 2147483647}"#, // Row 7: Max Int32 value (fits in Int32)
3135            r#"{"x": 9223372036854775807}"#, // Row 8: Large Int64 value (cannot convert to Int32)
3136        ];
3137
3138        let string_array: Arc<dyn arrow::array::Array> = Arc::new(StringArray::from(json_strings));
3139        let variant_array = json_to_variant(&string_array).unwrap();
3140
3141        // Test 1: nullable field (should allow nulls from cast failures)
3142        let nullable_field = Arc::new(Field::new("result", DataType::Int32, true));
3143        let options_nullable = GetOptions {
3144            path: VariantPath::try_from("x").unwrap(),
3145            as_type: Some(nullable_field.clone()),
3146            cast_options: CastOptions::default(),
3147        };
3148
3149        let variant_array_ref = ArrayRef::from(variant_array);
3150        let result_nullable = variant_get(&variant_array_ref, options_nullable).unwrap();
3151
3152        // Verify we get an Int32Array with nulls for cast failures
3153        let int32_result = result_nullable
3154            .as_any()
3155            .downcast_ref::<Int32Array>()
3156            .unwrap();
3157        assert_eq!(int32_result.len(), 9);
3158
3159        // Row 0: 42 converts successfully to Int32
3160        assert!(!int32_result.is_null(0));
3161        assert_eq!(int32_result.value(0), 42);
3162
3163        // Row 1: "not_a_number" fails to convert -> NULL
3164        assert!(int32_result.is_null(1));
3165
3166        // Row 2: explicit null value -> NULL
3167        assert!(int32_result.is_null(2));
3168
3169        // Row 3: "hello" (wrong type) fails to convert -> NULL
3170        assert!(int32_result.is_null(3));
3171
3172        // Row 4: missing "x" field (SQL NULL case) -> NULL
3173        assert!(int32_result.is_null(4));
3174
3175        // Row 5: 127 (small int, potential Int8 -> Int32 widening)
3176        // Current behavior: JSON parses to Int8, should convert to Int32
3177        assert!(!int32_result.is_null(5));
3178        assert_eq!(int32_result.value(5), 127);
3179
3180        // Row 6: 32767 (medium int, potential Int16 -> Int32 widening)
3181        // Current behavior: JSON parses to Int16, should convert to Int32
3182        assert!(!int32_result.is_null(6));
3183        assert_eq!(int32_result.value(6), 32767);
3184
3185        // Row 7: 2147483647 (max Int32, fits exactly)
3186        // Current behavior: Should convert successfully
3187        assert!(!int32_result.is_null(7));
3188        assert_eq!(int32_result.value(7), 2147483647);
3189
3190        // Row 8: 9223372036854775807 (large Int64, cannot fit in Int32)
3191        // Current behavior: Should fail conversion -> NULL
3192        assert!(int32_result.is_null(8));
3193
3194        // Test 2: non-nullable field (behavior should be the same with safe casting)
3195        let non_nullable_field = Arc::new(Field::new("result", DataType::Int32, false));
3196        let options_non_nullable = GetOptions {
3197            path: VariantPath::try_from("x").unwrap(),
3198            as_type: Some(non_nullable_field.clone()),
3199            cast_options: CastOptions::default(), // safe=true by default
3200        };
3201
3202        // Create variant array again since we moved it
3203        let variant_array_2 = json_to_variant(&string_array).unwrap();
3204        let variant_array_ref_2 = ArrayRef::from(variant_array_2);
3205        let result_non_nullable = variant_get(&variant_array_ref_2, options_non_nullable).unwrap();
3206        let int32_result_2 = result_non_nullable
3207            .as_any()
3208            .downcast_ref::<Int32Array>()
3209            .unwrap();
3210
3211        // Even with a non-nullable field, safe casting should still produce nulls for failures
3212        assert_eq!(int32_result_2.len(), 9);
3213
3214        // Row 0: 42 converts successfully to Int32
3215        assert!(!int32_result_2.is_null(0));
3216        assert_eq!(int32_result_2.value(0), 42);
3217
3218        // Rows 1-4: All should be null due to safe casting behavior
3219        // (non-nullable field specification doesn't override safe casting behavior)
3220        assert!(int32_result_2.is_null(1)); // "not_a_number"
3221        assert!(int32_result_2.is_null(2)); // explicit null
3222        assert!(int32_result_2.is_null(3)); // "hello"
3223        assert!(int32_result_2.is_null(4)); // missing field
3224
3225        // Rows 5-7: These should also convert successfully (numeric widening/fitting)
3226        assert!(!int32_result_2.is_null(5)); // 127 (Int8 -> Int32)
3227        assert_eq!(int32_result_2.value(5), 127);
3228        assert!(!int32_result_2.is_null(6)); // 32767 (Int16 -> Int32)
3229        assert_eq!(int32_result_2.value(6), 32767);
3230        assert!(!int32_result_2.is_null(7)); // 2147483647 (fits in Int32)
3231        assert_eq!(int32_result_2.value(7), 2147483647);
3232
3233        // Row 8: Large Int64 should fail conversion -> NULL
3234        assert!(int32_result_2.is_null(8)); // 9223372036854775807 (too large for Int32)
3235    }
3236
3237    #[test]
3238    fn test_struct_extraction_subset_superset_schema_perfectly_shredded() {
3239        // Create variant with diverse null patterns and empty objects
3240        let variant_array = create_comprehensive_shredded_variant();
3241
3242        // Request struct with fields "a", "b", "d" (skip existing "c", add missing "d")
3243        let struct_fields = Fields::from(vec![
3244            Field::new("a", DataType::Int32, true),
3245            Field::new("b", DataType::Int32, true),
3246            Field::new("d", DataType::Int32, true),
3247        ]);
3248        let struct_type = DataType::Struct(struct_fields);
3249
3250        let options = GetOptions {
3251            path: VariantPath::default(),
3252            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
3253            cast_options: CastOptions::default(),
3254        };
3255
3256        let result = variant_get(&variant_array, options).unwrap();
3257
3258        // Verify the result is a StructArray with 3 fields and 5 rows
3259        let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
3260        assert_eq!(struct_result.len(), 5);
3261        assert_eq!(struct_result.num_columns(), 3);
3262
3263        let field_a = struct_result
3264            .column(0)
3265            .as_any()
3266            .downcast_ref::<Int32Array>()
3267            .unwrap();
3268        let field_b = struct_result
3269            .column(1)
3270            .as_any()
3271            .downcast_ref::<Int32Array>()
3272            .unwrap();
3273        let field_d = struct_result
3274            .column(2)
3275            .as_any()
3276            .downcast_ref::<Int32Array>()
3277            .unwrap();
3278
3279        // Row 0: Normal values {"a": 1, "b": 2, "c": 3} → {a: 1, b: 2, d: NULL}
3280        assert!(!struct_result.is_null(0));
3281        assert_eq!(field_a.value(0), 1);
3282        assert_eq!(field_b.value(0), 2);
3283        assert!(field_d.is_null(0)); // Missing field "d"
3284
3285        // Row 1: Top-level NULL → struct-level NULL
3286        assert!(struct_result.is_null(1));
3287
3288        // Row 2: Field "a" missing → {a: NULL, b: 2, d: NULL}
3289        assert!(!struct_result.is_null(2));
3290        assert!(field_a.is_null(2)); // Missing field "a"
3291        assert_eq!(field_b.value(2), 2);
3292        assert!(field_d.is_null(2)); // Missing field "d"
3293
3294        // Row 3: Field "b" missing → {a: 1, b: NULL, d: NULL}
3295        assert!(!struct_result.is_null(3));
3296        assert_eq!(field_a.value(3), 1);
3297        assert!(field_b.is_null(3)); // Missing field "b"
3298        assert!(field_d.is_null(3)); // Missing field "d"
3299
3300        // Row 4: Empty object {} → {a: NULL, b: NULL, d: NULL}
3301        assert!(!struct_result.is_null(4));
3302        assert!(field_a.is_null(4)); // Empty object
3303        assert!(field_b.is_null(4)); // Empty object
3304        assert!(field_d.is_null(4)); // Missing field "d"
3305    }
3306
3307    #[test]
3308    fn test_nested_struct_extraction_perfectly_shredded() {
3309        // Create nested variant with diverse null patterns
3310        let variant_array = create_comprehensive_nested_shredded_variant();
3311        println!("variant_array: {variant_array:?}");
3312
3313        // Request 3-level nested struct type {"outer": {"inner": INT}}
3314        let inner_field = Field::new("inner", DataType::Int32, true);
3315        let inner_type = DataType::Struct(Fields::from(vec![inner_field]));
3316        let outer_field = Field::new("outer", inner_type, true);
3317        let result_type = DataType::Struct(Fields::from(vec![outer_field]));
3318
3319        let options = GetOptions {
3320            path: VariantPath::default(),
3321            as_type: Some(Arc::new(Field::new("result", result_type, true))),
3322            cast_options: CastOptions::default(),
3323        };
3324
3325        let result = variant_get(&variant_array, options).unwrap();
3326        println!("result: {result:?}");
3327
3328        // Verify the result is a StructArray with "outer" field and 4 rows
3329        let outer_struct = result.as_any().downcast_ref::<StructArray>().unwrap();
3330        assert_eq!(outer_struct.len(), 4);
3331        assert_eq!(outer_struct.num_columns(), 1);
3332
3333        // Get the "inner" struct column
3334        let inner_struct = outer_struct
3335            .column(0)
3336            .as_any()
3337            .downcast_ref::<StructArray>()
3338            .unwrap();
3339        assert_eq!(inner_struct.num_columns(), 1);
3340
3341        // Get the "leaf" field (Int32 values)
3342        let leaf_field = inner_struct
3343            .column(0)
3344            .as_any()
3345            .downcast_ref::<Int32Array>()
3346            .unwrap();
3347
3348        // Row 0: Normal nested {"outer": {"inner": {"leaf": 42}}}
3349        assert!(!outer_struct.is_null(0));
3350        assert!(!inner_struct.is_null(0));
3351        assert_eq!(leaf_field.value(0), 42);
3352
3353        // Row 1: "inner" field missing → {outer: {inner: NULL}}
3354        assert!(!outer_struct.is_null(1));
3355        assert!(!inner_struct.is_null(1)); // outer exists, inner exists but leaf is NULL
3356        assert!(leaf_field.is_null(1)); // leaf field is NULL
3357
3358        // Row 2: "outer" field missing → {outer: NULL}
3359        assert!(!outer_struct.is_null(2));
3360        assert!(inner_struct.is_null(2)); // outer field is NULL
3361
3362        // Row 3: Top-level NULL → struct-level NULL
3363        assert!(outer_struct.is_null(3));
3364    }
3365
3366    #[test]
3367    fn test_path_based_null_masks_one_step() {
3368        // Create nested variant with diverse null patterns
3369        let variant_array = create_comprehensive_nested_shredded_variant();
3370
3371        // Extract "outer" field using path-based variant_get
3372        let path = VariantPath::try_from("outer").unwrap();
3373        let inner_field = Field::new("inner", DataType::Int32, true);
3374        let result_type = DataType::Struct(Fields::from(vec![inner_field]));
3375
3376        let options = GetOptions {
3377            path,
3378            as_type: Some(Arc::new(Field::new("result", result_type, true))),
3379            cast_options: CastOptions::default(),
3380        };
3381
3382        let result = variant_get(&variant_array, options).unwrap();
3383
3384        // Verify the result is a StructArray with "inner" field and 4 rows
3385        let outer_result = result.as_any().downcast_ref::<StructArray>().unwrap();
3386        assert_eq!(outer_result.len(), 4);
3387        assert_eq!(outer_result.num_columns(), 1);
3388
3389        // Get the "inner" field (Int32 values)
3390        let inner_field = outer_result
3391            .column(0)
3392            .as_any()
3393            .downcast_ref::<Int32Array>()
3394            .unwrap();
3395
3396        // Row 0: Normal nested {"outer": {"inner": 42}} → {"inner": 42}
3397        assert!(!outer_result.is_null(0));
3398        assert_eq!(inner_field.value(0), 42);
3399
3400        // Row 1: Inner field null {"outer": {"inner": null}} → {"inner": null}
3401        assert!(!outer_result.is_null(1));
3402        assert!(inner_field.is_null(1));
3403
3404        // Row 2: Outer field null {"outer": null} → null (entire struct is null)
3405        assert!(outer_result.is_null(2));
3406
3407        // Row 3: Top-level null → null (entire struct is null)
3408        assert!(outer_result.is_null(3));
3409    }
3410
3411    #[test]
3412    fn test_path_based_null_masks_two_steps() {
3413        // Create nested variant with diverse null patterns
3414        let variant_array = create_comprehensive_nested_shredded_variant();
3415
3416        // Extract "outer.inner" field using path-based variant_get
3417        let path = VariantPath::try_from("outer").unwrap().join("inner");
3418
3419        let options = GetOptions {
3420            path,
3421            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
3422            cast_options: CastOptions::default(),
3423        };
3424
3425        let result = variant_get(&variant_array, options).unwrap();
3426
3427        // Verify the result is an Int32Array with 4 rows
3428        let int_result = result.as_any().downcast_ref::<Int32Array>().unwrap();
3429        assert_eq!(int_result.len(), 4);
3430
3431        // Row 0: Normal nested {"outer": {"inner": 42}} → 42
3432        assert!(!int_result.is_null(0));
3433        assert_eq!(int_result.value(0), 42);
3434
3435        // Row 1: Inner field null {"outer": {"inner": null}} → null
3436        assert!(int_result.is_null(1));
3437
3438        // Row 2: Outer field null {"outer": null} → null (path traversal fails)
3439        assert!(int_result.is_null(2));
3440
3441        // Row 3: Top-level null → null (path traversal fails)
3442        assert!(int_result.is_null(3));
3443    }
3444
3445    #[test]
3446    fn test_struct_extraction_mixed_and_unshredded() {
3447        // Create a partially shredded variant (x shredded, y not)
3448        let variant_array = create_mixed_and_unshredded_variant();
3449
3450        // Request struct with both shredded and unshredded fields
3451        let struct_fields = Fields::from(vec![
3452            Field::new("x", DataType::Int32, true),
3453            Field::new("y", DataType::Int32, true),
3454        ]);
3455        let struct_type = DataType::Struct(struct_fields);
3456
3457        let options = GetOptions {
3458            path: VariantPath::default(),
3459            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
3460            cast_options: CastOptions::default(),
3461        };
3462
3463        let result = variant_get(&variant_array, options).unwrap();
3464
3465        // Verify the mixed shredding works (should succeed with current implementation)
3466        let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
3467        assert_eq!(struct_result.len(), 4);
3468        assert_eq!(struct_result.num_columns(), 2);
3469
3470        let field_x = struct_result
3471            .column(0)
3472            .as_any()
3473            .downcast_ref::<Int32Array>()
3474            .unwrap();
3475        let field_y = struct_result
3476            .column(1)
3477            .as_any()
3478            .downcast_ref::<Int32Array>()
3479            .unwrap();
3480
3481        // Row 0: {"x": 1, "y": 42} - x from shredded, y from value field
3482        assert_eq!(field_x.value(0), 1);
3483        assert_eq!(field_y.value(0), 42);
3484
3485        // Row 1: {"x": 2} - x from shredded, y missing (perfect shredding)
3486        assert_eq!(field_x.value(1), 2);
3487        assert!(field_y.is_null(1));
3488
3489        // Row 2: {"x": 3, "y": null} - x from shredded, y explicitly null in value
3490        assert_eq!(field_x.value(2), 3);
3491        assert!(field_y.is_null(2));
3492
3493        // Row 3: top-level null - entire struct row should be null
3494        assert!(struct_result.is_null(3));
3495    }
3496
3497    #[test]
3498    fn test_struct_row_builder_handles_unshredded_nested_structs() {
3499        // Create completely unshredded JSON variant (no typed_value at all)
3500        let json_strings = vec![
3501            r#"{"outer": {"inner": 42}}"#,
3502            r#"{"outer": {"inner": 100}}"#,
3503        ];
3504        let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
3505        let variant_array = json_to_variant(&string_array).unwrap();
3506
3507        // Request nested struct
3508        let inner_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]);
3509        let inner_struct_type = DataType::Struct(inner_fields);
3510        let outer_fields = Fields::from(vec![Field::new("outer", inner_struct_type, true)]);
3511        let outer_struct_type = DataType::Struct(outer_fields);
3512
3513        let options = GetOptions {
3514            path: VariantPath::default(),
3515            as_type: Some(Arc::new(Field::new("result", outer_struct_type, true))),
3516            cast_options: CastOptions::default(),
3517        };
3518
3519        let variant_array_ref = ArrayRef::from(variant_array);
3520        let result = variant_get(&variant_array_ref, options).unwrap();
3521
3522        let outer_struct = result.as_struct();
3523        assert_eq!(outer_struct.len(), 2);
3524        assert_eq!(outer_struct.num_columns(), 1);
3525
3526        let inner_struct = outer_struct.column(0).as_struct();
3527        assert_eq!(inner_struct.num_columns(), 1);
3528
3529        let inner_values = inner_struct
3530            .column(0)
3531            .as_any()
3532            .downcast_ref::<Int32Array>()
3533            .unwrap();
3534        assert_eq!(inner_values.value(0), 42);
3535        assert_eq!(inner_values.value(1), 100);
3536    }
3537
3538    #[test]
3539    fn test_unshredded_struct_safe_cast_non_object_rows_are_null() {
3540        let json_strings = vec![r#"{"a": 1, "b": 2}"#, "123", "{}"];
3541        let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
3542        let variant_array_ref = ArrayRef::from(json_to_variant(&string_array).unwrap());
3543
3544        let struct_fields = Fields::from(vec![
3545            Field::new("a", DataType::Int32, true),
3546            Field::new("b", DataType::Int32, true),
3547        ]);
3548        let options = GetOptions {
3549            path: VariantPath::default(),
3550            as_type: Some(Arc::new(Field::new(
3551                "result",
3552                DataType::Struct(struct_fields),
3553                true,
3554            ))),
3555            cast_options: CastOptions::default(),
3556        };
3557
3558        let result = variant_get(&variant_array_ref, options).unwrap();
3559        let struct_result = result.as_struct();
3560        let field_a = struct_result
3561            .column(0)
3562            .as_primitive::<arrow::datatypes::Int32Type>();
3563        let field_b = struct_result
3564            .column(1)
3565            .as_primitive::<arrow::datatypes::Int32Type>();
3566
3567        // Row 0 is an object, so the struct row is valid with extracted fields.
3568        assert!(!struct_result.is_null(0));
3569        assert_eq!(field_a.value(0), 1);
3570        assert_eq!(field_b.value(0), 2);
3571
3572        // Row 1 is a scalar, so safe struct cast should produce a NULL struct row.
3573        assert!(struct_result.is_null(1));
3574        assert!(field_a.is_null(1));
3575        assert!(field_b.is_null(1));
3576
3577        // Row 2 is an empty object, so the struct row is valid with missing fields as NULL.
3578        assert!(!struct_result.is_null(2));
3579        assert!(field_a.is_null(2));
3580        assert!(field_b.is_null(2));
3581    }
3582
3583    #[test]
3584    fn test_unshredded_struct_strict_cast_non_object_errors() {
3585        let json_strings = vec![r#"{"a": 1, "b": 2}"#, "123"];
3586        let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
3587        let variant_array_ref = ArrayRef::from(json_to_variant(&string_array).unwrap());
3588
3589        let struct_fields = Fields::from(vec![
3590            Field::new("a", DataType::Int32, true),
3591            Field::new("b", DataType::Int32, true),
3592        ]);
3593        let options = GetOptions {
3594            path: VariantPath::default(),
3595            as_type: Some(Arc::new(Field::new(
3596                "result",
3597                DataType::Struct(struct_fields),
3598                true,
3599            ))),
3600            cast_options: CastOptions {
3601                safe: false,
3602                ..Default::default()
3603            },
3604        };
3605
3606        let err = variant_get(&variant_array_ref, options).unwrap_err();
3607        assert!(
3608            err.to_string()
3609                .contains("Failed to extract struct from variant")
3610        );
3611    }
3612
3613    /// Create comprehensive shredded variant with diverse null patterns and empty objects
3614    /// Rows: normal values, top-level null, missing field a, missing field b, empty object
3615    fn create_comprehensive_shredded_variant() -> ArrayRef {
3616        let (metadata, _) = {
3617            let mut builder = parquet_variant::VariantBuilder::new();
3618            let obj = builder.new_object();
3619            obj.finish();
3620            builder.finish()
3621        };
3622
3623        // Create null buffer for top-level nulls
3624        let nulls = NullBuffer::from(vec![
3625            true,  // row 0: normal values
3626            false, // row 1: top-level null
3627            true,  // row 2: missing field a
3628            true,  // row 3: missing field b
3629            true,  // row 4: empty object
3630        ]);
3631
3632        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 5));
3633
3634        // Create shredded fields with different null patterns
3635        // Field "a": present in rows 0,3 (missing in rows 1,2,4)
3636        let a_field_typed_value = Int32Array::from(vec![Some(1), None, None, Some(1), None]);
3637        let a_field_shredded = ShreddedVariantFieldArray::from_parts(
3638            None,
3639            Some(Arc::new(a_field_typed_value) as ArrayRef),
3640            None,
3641        );
3642
3643        // Field "b": present in rows 0,2 (missing in rows 1,3,4)
3644        let b_field_typed_value = Int32Array::from(vec![Some(2), None, Some(2), None, None]);
3645        let b_field_shredded = ShreddedVariantFieldArray::from_parts(
3646            None,
3647            Some(Arc::new(b_field_typed_value) as ArrayRef),
3648            None,
3649        );
3650
3651        // Field "c": present in row 0 only (missing in all other rows)
3652        let c_field_typed_value = Int32Array::from(vec![Some(3), None, None, None, None]);
3653        let c_field_shredded = ShreddedVariantFieldArray::from_parts(
3654            None,
3655            Some(Arc::new(c_field_typed_value) as ArrayRef),
3656            None,
3657        );
3658
3659        // Create main typed_value struct
3660        let typed_value_fields = Fields::from(vec![
3661            Field::new("a", a_field_shredded.data_type().clone(), true),
3662            Field::new("b", b_field_shredded.data_type().clone(), true),
3663            Field::new("c", c_field_shredded.data_type().clone(), true),
3664        ]);
3665        let typed_value_struct = StructArray::try_new(
3666            typed_value_fields,
3667            vec![
3668                ArrayRef::from(a_field_shredded),
3669                ArrayRef::from(b_field_shredded),
3670                ArrayRef::from(c_field_shredded),
3671            ],
3672            None,
3673        )
3674        .unwrap();
3675
3676        // Build final VariantArray with top-level nulls
3677        ArrayRef::from(VariantArray::from_parts(
3678            Arc::new(metadata_array),
3679            None,
3680            Some(Arc::new(typed_value_struct)),
3681            Some(nulls),
3682        ))
3683    }
3684
3685    /// Create comprehensive nested shredded variant with diverse null patterns
3686    /// Represents 3-level structure: variant -> outer -> inner (INT value)
3687    /// The shredding schema is: {"metadata": BINARY, "typed_value": {"outer": {"typed_value": {"inner": {"typed_value": INT}}}}}
3688    /// Rows: normal nested value, inner field null, outer field null, top-level null
3689    fn create_comprehensive_nested_shredded_variant() -> ArrayRef {
3690        // Create the inner level: contains typed_value with Int32 values
3691        // Row 0: has value 42, Row 1: inner null, Row 2: outer null, Row 3: top-level null
3692        let inner_typed_value = Int32Array::from(vec![Some(42), None, None, None]); // dummy value for row 2
3693        let inner = ShreddedVariantFieldArray::from_parts(
3694            None,
3695            Some(Arc::new(inner_typed_value) as ArrayRef),
3696            None,
3697        );
3698
3699        let outer_typed_value_nulls = NullBuffer::from(vec![
3700            true,  // row 0: inner struct exists with typed_value=42
3701            false, // row 1: inner field NULL
3702            false, // row 2: outer field NULL
3703            false, // row 3: top-level NULL
3704        ]);
3705        let outer_typed_value = StructArrayBuilder::new()
3706            .with_field("inner", ArrayRef::from(inner), false)
3707            .with_nulls(outer_typed_value_nulls)
3708            .build();
3709
3710        let outer = ShreddedVariantFieldArray::from_parts(
3711            None,
3712            Some(Arc::new(outer_typed_value) as ArrayRef),
3713            None,
3714        );
3715
3716        let typed_value_nulls = NullBuffer::from(vec![
3717            true,  // row 0: inner struct exists with typed_value=42
3718            true,  // row 1: inner field NULL
3719            false, // row 2: outer field NULL
3720            false, // row 3: top-level NULL
3721        ]);
3722        let typed_value = StructArrayBuilder::new()
3723            .with_field("outer", ArrayRef::from(outer), false)
3724            .with_nulls(typed_value_nulls)
3725            .build();
3726
3727        // Build final VariantArray with top-level nulls
3728        let metadata_array =
3729            BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 4));
3730        let nulls = NullBuffer::from(vec![
3731            true,  // row 0: inner struct exists with typed_value=42
3732            true,  // row 1: inner field NULL
3733            true,  // row 2: outer field NULL
3734            false, // row 3: top-level NULL
3735        ]);
3736        ArrayRef::from(VariantArray::from_parts(
3737            Arc::new(metadata_array),
3738            None,
3739            Some(Arc::new(typed_value)),
3740            Some(nulls),
3741        ))
3742    }
3743
3744    /// Create variant with mixed shredding (spec-compliant) including null scenarios
3745    /// Field "x" is globally shredded, field "y" is never shredded
3746    fn create_mixed_and_unshredded_variant() -> ArrayRef {
3747        // Create spec-compliant mixed shredding:
3748        // - Field "x" is globally shredded (has typed_value column)
3749        // - Field "y" is never shredded (only appears in value field when present)
3750
3751        let (metadata, y_field_value) = {
3752            let mut builder = parquet_variant::VariantBuilder::new();
3753            let mut obj = builder.new_object();
3754            obj.insert("y", Variant::from(42));
3755            obj.finish();
3756            builder.finish()
3757        };
3758
3759        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
3760
3761        // Value field contains objects with unshredded fields only (never contains "x")
3762        // Row 0: {"y": "foo"} - x is shredded out, y remains in value
3763        // Row 1: {} - both x and y are absent (perfect shredding for x, y missing)
3764        // Row 2: {"y": null} - x is shredded out, y explicitly null
3765        // Row 3: top-level null (encoded in VariantArray's null mask, but fields contain valid data)
3766
3767        let empty_object_value = {
3768            let mut builder = parquet_variant::VariantBuilder::new();
3769            builder.new_object().finish();
3770            let (_, value) = builder.finish();
3771            value
3772        };
3773
3774        let y_null_value = {
3775            let mut builder = parquet_variant::VariantBuilder::new();
3776            builder.new_object().with_field("y", Variant::Null).finish();
3777            let (_, value) = builder.finish();
3778            value
3779        };
3780
3781        let value_array = BinaryViewArray::from(vec![
3782            Some(y_field_value.as_slice()),      // Row 0: {"y": 42}
3783            Some(empty_object_value.as_slice()), // Row 1: {}
3784            Some(y_null_value.as_slice()),       // Row 2: {"y": null}
3785            Some(empty_object_value.as_slice()), // Row 3: top-level null (but value field contains valid data)
3786        ]);
3787
3788        // Create shredded field "x" (globally shredded - never appears in value field)
3789        // For top-level null row, the field still needs valid content (not null)
3790        let x_field_typed_value = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(0)]);
3791        let x_field_shredded = ShreddedVariantFieldArray::from_parts(
3792            None,
3793            Some(Arc::new(x_field_typed_value) as ArrayRef),
3794            None,
3795        );
3796
3797        // Create main typed_value struct (only contains shredded fields)
3798        let typed_value_struct = StructArrayBuilder::new()
3799            .with_field("x", ArrayRef::from(x_field_shredded), false)
3800            .build();
3801
3802        // Build VariantArray with both value and typed_value (PartiallyShredded)
3803        // Top-level null is encoded in the main StructArray's null mask
3804        let variant_nulls = NullBuffer::from(vec![true, true, true, false]); // Row 3 is top-level null
3805        ArrayRef::from(VariantArray::from_parts(
3806            Arc::new(metadata_array),
3807            Some(Arc::new(value_array)),
3808            Some(Arc::new(typed_value_struct)),
3809            Some(variant_nulls),
3810        ))
3811    }
3812
3813    #[test]
3814    fn get_decimal32_rescaled_to_scale2() {
3815        // Build unshredded variant values with different scales
3816        let mut builder = crate::VariantArrayBuilder::new(5);
3817        builder.append_variant(VariantDecimal4::try_new(1234, 2).unwrap().into()); // 12.34
3818        builder.append_variant(VariantDecimal4::try_new(1234, 3).unwrap().into()); // 1.234
3819        builder.append_variant(VariantDecimal4::try_new(1234, 0).unwrap().into()); // 1234
3820        builder.append_null();
3821        builder.append_variant(
3822            VariantDecimal8::try_new((VariantDecimal4::MAX_UNSCALED_VALUE as i64) + 1, 3)
3823                .unwrap()
3824                .into(),
3825        ); // should fit into Decimal32
3826        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3827
3828        let field = Field::new("result", DataType::Decimal32(9, 2), true);
3829        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3830        let result = variant_get(&variant_array, options).unwrap();
3831        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3832
3833        assert_eq!(result.precision(), 9);
3834        assert_eq!(result.scale(), 2);
3835        assert_eq!(result.value(0), 1234);
3836        assert_eq!(result.value(1), 123);
3837        assert_eq!(result.value(2), 123400);
3838        assert!(result.is_null(3));
3839        assert_eq!(
3840            result.value(4),
3841            VariantDecimal4::MAX_UNSCALED_VALUE / 10 + 1
3842        ); // should not be null as the final result fits into Decimal32
3843    }
3844
3845    #[test]
3846    fn get_decimal32_scale_down_rounding() {
3847        let mut builder = crate::VariantArrayBuilder::new(7);
3848        builder.append_variant(VariantDecimal4::try_new(1235, 0).unwrap().into());
3849        builder.append_variant(VariantDecimal4::try_new(1245, 0).unwrap().into());
3850        builder.append_variant(VariantDecimal4::try_new(-1235, 0).unwrap().into());
3851        builder.append_variant(VariantDecimal4::try_new(-1245, 0).unwrap().into());
3852        builder.append_variant(VariantDecimal4::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
3853        builder.append_variant(VariantDecimal4::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
3854        builder.append_variant(VariantDecimal4::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
3855        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3856
3857        let field = Field::new("result", DataType::Decimal32(9, -1), true);
3858        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3859        let result = variant_get(&variant_array, options).unwrap();
3860        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3861
3862        assert_eq!(result.precision(), 9);
3863        assert_eq!(result.scale(), -1);
3864        assert_eq!(result.value(0), 124);
3865        assert_eq!(result.value(1), 125);
3866        assert_eq!(result.value(2), -124);
3867        assert_eq!(result.value(3), -125);
3868        assert_eq!(result.value(4), 1);
3869        assert!(result.is_valid(5));
3870        assert_eq!(result.value(5), 0);
3871        assert_eq!(result.value(6), 1);
3872    }
3873
3874    #[test]
3875    fn get_decimal32_large_scale_reduction() {
3876        let mut builder = crate::VariantArrayBuilder::new(2);
3877        builder.append_variant(
3878            VariantDecimal4::try_new(-VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3879                .unwrap()
3880                .into(),
3881        );
3882        builder.append_variant(
3883            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3884                .unwrap()
3885                .into(),
3886        );
3887        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3888
3889        let field = Field::new("result", DataType::Decimal32(9, -9), true);
3890        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3891        let result = variant_get(&variant_array, options).unwrap();
3892        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3893
3894        assert_eq!(result.precision(), 9);
3895        assert_eq!(result.scale(), -9);
3896        assert_eq!(result.value(0), -1);
3897        assert_eq!(result.value(1), 1);
3898
3899        let field = Field::new("result", DataType::Decimal32(9, -10), true);
3900        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3901        let result = variant_get(&variant_array, options).unwrap();
3902        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3903
3904        assert_eq!(result.precision(), 9);
3905        assert_eq!(result.scale(), -10);
3906        assert!(result.is_valid(0));
3907        assert_eq!(result.value(0), 0);
3908        assert!(result.is_valid(1));
3909        assert_eq!(result.value(1), 0);
3910    }
3911
3912    #[test]
3913    fn get_decimal32_precision_overflow_safe() {
3914        // Exceed Decimal32 after scaling and rounding
3915        let mut builder = crate::VariantArrayBuilder::new(2);
3916        builder.append_variant(
3917            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3918                .unwrap()
3919                .into(),
3920        );
3921        builder.append_variant(
3922            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 9)
3923                .unwrap()
3924                .into(),
3925        ); // integer value round up overflows
3926        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3927
3928        let field = Field::new("result", DataType::Decimal32(2, 2), true);
3929        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3930        let result = variant_get(&variant_array, options).unwrap();
3931        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
3932
3933        assert!(result.is_null(0));
3934        assert!(result.is_null(1)); // should overflow because 1.00 does not fit into precision (2)
3935    }
3936
3937    #[test]
3938    fn get_decimal32_precision_overflow_unsafe_errors() {
3939        let mut builder = crate::VariantArrayBuilder::new(1);
3940        builder.append_variant(
3941            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
3942                .unwrap()
3943                .into(),
3944        );
3945        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3946
3947        let field = Field::new("result", DataType::Decimal32(9, 2), true);
3948        let cast_options = CastOptions {
3949            safe: false,
3950            ..Default::default()
3951        };
3952        let options = GetOptions::new()
3953            .with_as_type(Some(FieldRef::from(field)))
3954            .with_cast_options(cast_options);
3955        let err = variant_get(&variant_array, options).unwrap_err();
3956
3957        assert!(
3958            err.to_string().contains(
3959                "Failed to cast to Decimal32(precision=9, scale=2) from variant Decimal4"
3960            )
3961        );
3962    }
3963
3964    #[test]
3965    fn get_decimal64_rescaled_to_scale2() {
3966        let mut builder = crate::VariantArrayBuilder::new(5);
3967        builder.append_variant(VariantDecimal8::try_new(1234, 2).unwrap().into()); // 12.34
3968        builder.append_variant(VariantDecimal8::try_new(1234, 3).unwrap().into()); // 1.234
3969        builder.append_variant(VariantDecimal8::try_new(1234, 0).unwrap().into()); // 1234
3970        builder.append_null();
3971        builder.append_variant(
3972            VariantDecimal16::try_new((VariantDecimal8::MAX_UNSCALED_VALUE as i128) + 1, 3)
3973                .unwrap()
3974                .into(),
3975        ); // should fit into Decimal64
3976        let variant_array: ArrayRef = ArrayRef::from(builder.build());
3977
3978        let field = Field::new("result", DataType::Decimal64(18, 2), true);
3979        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
3980        let result = variant_get(&variant_array, options).unwrap();
3981        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
3982
3983        assert_eq!(result.precision(), 18);
3984        assert_eq!(result.scale(), 2);
3985        assert_eq!(result.value(0), 1234);
3986        assert_eq!(result.value(1), 123);
3987        assert_eq!(result.value(2), 123400);
3988        assert!(result.is_null(3));
3989        assert_eq!(
3990            result.value(4),
3991            VariantDecimal8::MAX_UNSCALED_VALUE / 10 + 1
3992        ); // should not be null as the final result fits into Decimal64
3993    }
3994
3995    #[test]
3996    fn get_decimal64_scale_down_rounding() {
3997        let mut builder = crate::VariantArrayBuilder::new(7);
3998        builder.append_variant(VariantDecimal8::try_new(1235, 0).unwrap().into());
3999        builder.append_variant(VariantDecimal8::try_new(1245, 0).unwrap().into());
4000        builder.append_variant(VariantDecimal8::try_new(-1235, 0).unwrap().into());
4001        builder.append_variant(VariantDecimal8::try_new(-1245, 0).unwrap().into());
4002        builder.append_variant(VariantDecimal8::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
4003        builder.append_variant(VariantDecimal8::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
4004        builder.append_variant(VariantDecimal8::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
4005        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4006
4007        let field = Field::new("result", DataType::Decimal64(18, -1), true);
4008        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4009        let result = variant_get(&variant_array, options).unwrap();
4010        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
4011
4012        assert_eq!(result.precision(), 18);
4013        assert_eq!(result.scale(), -1);
4014        assert_eq!(result.value(0), 124);
4015        assert_eq!(result.value(1), 125);
4016        assert_eq!(result.value(2), -124);
4017        assert_eq!(result.value(3), -125);
4018        assert_eq!(result.value(4), 1);
4019        assert!(result.is_valid(5));
4020        assert_eq!(result.value(5), 0);
4021        assert_eq!(result.value(6), 1);
4022    }
4023
4024    #[test]
4025    fn get_decimal64_large_scale_reduction() {
4026        let mut builder = crate::VariantArrayBuilder::new(2);
4027        builder.append_variant(
4028            VariantDecimal8::try_new(-VariantDecimal8::MAX_UNSCALED_VALUE, 0)
4029                .unwrap()
4030                .into(),
4031        );
4032        builder.append_variant(
4033            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
4034                .unwrap()
4035                .into(),
4036        );
4037        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4038
4039        let field = Field::new("result", DataType::Decimal64(18, -18), true);
4040        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4041        let result = variant_get(&variant_array, options).unwrap();
4042        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
4043
4044        assert_eq!(result.precision(), 18);
4045        assert_eq!(result.scale(), -18);
4046        assert_eq!(result.value(0), -1);
4047        assert_eq!(result.value(1), 1);
4048
4049        let field = Field::new("result", DataType::Decimal64(18, -19), true);
4050        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4051        let result = variant_get(&variant_array, options).unwrap();
4052        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
4053
4054        assert_eq!(result.precision(), 18);
4055        assert_eq!(result.scale(), -19);
4056        assert!(result.is_valid(0));
4057        assert_eq!(result.value(0), 0);
4058        assert!(result.is_valid(1));
4059        assert_eq!(result.value(1), 0);
4060    }
4061
4062    #[test]
4063    fn get_decimal64_precision_overflow_safe() {
4064        // Exceed Decimal64 after scaling and rounding
4065        let mut builder = crate::VariantArrayBuilder::new(2);
4066        builder.append_variant(
4067            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
4068                .unwrap()
4069                .into(),
4070        );
4071        builder.append_variant(
4072            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 18)
4073                .unwrap()
4074                .into(),
4075        ); // integer value round up overflows
4076        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4077
4078        let field = Field::new("result", DataType::Decimal64(2, 2), true);
4079        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4080        let result = variant_get(&variant_array, options).unwrap();
4081        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
4082
4083        assert!(result.is_null(0));
4084        assert!(result.is_null(1));
4085    }
4086
4087    #[test]
4088    fn get_decimal64_precision_overflow_unsafe_errors() {
4089        let mut builder = crate::VariantArrayBuilder::new(1);
4090        builder.append_variant(
4091            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
4092                .unwrap()
4093                .into(),
4094        );
4095        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4096
4097        let field = Field::new("result", DataType::Decimal64(18, 2), true);
4098        let cast_options = CastOptions {
4099            safe: false,
4100            ..Default::default()
4101        };
4102        let options = GetOptions::new()
4103            .with_as_type(Some(FieldRef::from(field)))
4104            .with_cast_options(cast_options);
4105        let err = variant_get(&variant_array, options).unwrap_err();
4106
4107        assert!(
4108            err.to_string().contains(
4109                "Failed to cast to Decimal64(precision=18, scale=2) from variant Decimal8"
4110            )
4111        );
4112    }
4113
4114    #[test]
4115    fn get_decimal128_rescaled_to_scale2() {
4116        let mut builder = crate::VariantArrayBuilder::new(4);
4117        builder.append_variant(VariantDecimal16::try_new(1234, 2).unwrap().into());
4118        builder.append_variant(VariantDecimal16::try_new(1234, 3).unwrap().into());
4119        builder.append_variant(VariantDecimal16::try_new(1234, 0).unwrap().into());
4120        builder.append_null();
4121        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4122
4123        let field = Field::new("result", DataType::Decimal128(38, 2), true);
4124        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4125        let result = variant_get(&variant_array, options).unwrap();
4126        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
4127
4128        assert_eq!(result.precision(), 38);
4129        assert_eq!(result.scale(), 2);
4130        assert_eq!(result.value(0), 1234);
4131        assert_eq!(result.value(1), 123);
4132        assert_eq!(result.value(2), 123400);
4133        assert!(result.is_null(3));
4134    }
4135
4136    #[test]
4137    fn get_decimal128_scale_down_rounding() {
4138        let mut builder = crate::VariantArrayBuilder::new(7);
4139        builder.append_variant(VariantDecimal16::try_new(1235, 0).unwrap().into());
4140        builder.append_variant(VariantDecimal16::try_new(1245, 0).unwrap().into());
4141        builder.append_variant(VariantDecimal16::try_new(-1235, 0).unwrap().into());
4142        builder.append_variant(VariantDecimal16::try_new(-1245, 0).unwrap().into());
4143        builder.append_variant(VariantDecimal16::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
4144        builder.append_variant(VariantDecimal16::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
4145        builder.append_variant(VariantDecimal16::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
4146        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4147
4148        let field = Field::new("result", DataType::Decimal128(38, -1), true);
4149        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4150        let result = variant_get(&variant_array, options).unwrap();
4151        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
4152
4153        assert_eq!(result.precision(), 38);
4154        assert_eq!(result.scale(), -1);
4155        assert_eq!(result.value(0), 124);
4156        assert_eq!(result.value(1), 125);
4157        assert_eq!(result.value(2), -124);
4158        assert_eq!(result.value(3), -125);
4159        assert_eq!(result.value(4), 1);
4160        assert!(result.is_valid(5));
4161        assert_eq!(result.value(5), 0);
4162        assert_eq!(result.value(6), 1);
4163    }
4164
4165    #[test]
4166    fn get_decimal128_precision_overflow_safe() {
4167        // Exceed Decimal128 after scaling and rounding
4168        let mut builder = crate::VariantArrayBuilder::new(2);
4169        builder.append_variant(
4170            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
4171                .unwrap()
4172                .into(),
4173        );
4174        builder.append_variant(
4175            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 38)
4176                .unwrap()
4177                .into(),
4178        ); // integer value round up overflows
4179        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4180
4181        let field = Field::new("result", DataType::Decimal128(2, 2), true);
4182        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4183        let result = variant_get(&variant_array, options).unwrap();
4184        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
4185
4186        assert!(result.is_null(0));
4187        assert!(result.is_null(1)); // should overflow because 1.00 does not fit into precision (2)
4188    }
4189
4190    #[test]
4191    fn get_decimal128_precision_overflow_unsafe_errors() {
4192        let mut builder = crate::VariantArrayBuilder::new(1);
4193        builder.append_variant(
4194            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
4195                .unwrap()
4196                .into(),
4197        );
4198        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4199
4200        let field = Field::new("result", DataType::Decimal128(38, 2), true);
4201        let cast_options = CastOptions {
4202            safe: false,
4203            ..Default::default()
4204        };
4205        let options = GetOptions::new()
4206            .with_as_type(Some(FieldRef::from(field)))
4207            .with_cast_options(cast_options);
4208        let err = variant_get(&variant_array, options).unwrap_err();
4209
4210        assert!(err.to_string().contains(
4211            "Failed to cast to Decimal128(precision=38, scale=2) from variant Decimal16"
4212        ));
4213    }
4214
4215    #[test]
4216    fn get_decimal256_rescaled_to_scale2() {
4217        // Build unshredded variant values with different scales using Decimal16 source
4218        let mut builder = crate::VariantArrayBuilder::new(4);
4219        builder.append_variant(VariantDecimal16::try_new(1234, 2).unwrap().into()); // 12.34
4220        builder.append_variant(VariantDecimal16::try_new(1234, 3).unwrap().into()); // 1.234
4221        builder.append_variant(VariantDecimal16::try_new(1234, 0).unwrap().into()); // 1234
4222        builder.append_null();
4223        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4224
4225        let field = Field::new("result", DataType::Decimal256(76, 2), true);
4226        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4227        let result = variant_get(&variant_array, options).unwrap();
4228        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
4229
4230        assert_eq!(result.precision(), 76);
4231        assert_eq!(result.scale(), 2);
4232        assert_eq!(result.value(0), i256::from_i128(1234));
4233        assert_eq!(result.value(1), i256::from_i128(123));
4234        assert_eq!(result.value(2), i256::from_i128(123400));
4235        assert!(result.is_null(3));
4236    }
4237
4238    #[test]
4239    fn get_decimal256_scale_down_rounding() {
4240        let mut builder = crate::VariantArrayBuilder::new(7);
4241        builder.append_variant(VariantDecimal16::try_new(1235, 0).unwrap().into());
4242        builder.append_variant(VariantDecimal16::try_new(1245, 0).unwrap().into());
4243        builder.append_variant(VariantDecimal16::try_new(-1235, 0).unwrap().into());
4244        builder.append_variant(VariantDecimal16::try_new(-1245, 0).unwrap().into());
4245        builder.append_variant(VariantDecimal16::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
4246        builder.append_variant(VariantDecimal16::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
4247        builder.append_variant(VariantDecimal16::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
4248        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4249
4250        let field = Field::new("result", DataType::Decimal256(76, -1), true);
4251        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4252        let result = variant_get(&variant_array, options).unwrap();
4253        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
4254
4255        assert_eq!(result.precision(), 76);
4256        assert_eq!(result.scale(), -1);
4257        assert_eq!(result.value(0), i256::from_i128(124));
4258        assert_eq!(result.value(1), i256::from_i128(125));
4259        assert_eq!(result.value(2), i256::from_i128(-124));
4260        assert_eq!(result.value(3), i256::from_i128(-125));
4261        assert_eq!(result.value(4), i256::from_i128(1));
4262        assert!(result.is_valid(5));
4263        assert_eq!(result.value(5), i256::from_i128(0));
4264        assert_eq!(result.value(6), i256::from_i128(1));
4265    }
4266
4267    #[test]
4268    fn get_decimal256_precision_overflow_safe() {
4269        // Exceed Decimal128 max precision (38) after scaling
4270        let mut builder = crate::VariantArrayBuilder::new(2);
4271        builder.append_variant(
4272            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1)
4273                .unwrap()
4274                .into(),
4275        );
4276        builder.append_variant(
4277            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
4278                .unwrap()
4279                .into(),
4280        );
4281        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4282
4283        let field = Field::new("result", DataType::Decimal256(76, 39), true);
4284        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4285        let result = variant_get(&variant_array, options).unwrap();
4286        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
4287
4288        // Input is Decimal16 with integer = 10^38-1 and scale = 1, target scale = 39
4289        // So expected integer is (10^38-1) * 10^(39-1) = (10^38-1) * 10^38
4290        let base = i256::from_i128(10);
4291        let factor = base.checked_pow(38).unwrap();
4292        let expected = i256::from_i128(VariantDecimal16::MAX_UNSCALED_VALUE)
4293            .checked_mul(factor)
4294            .unwrap();
4295        assert_eq!(result.value(0), expected);
4296        assert!(result.is_null(1));
4297    }
4298
4299    #[test]
4300    fn get_decimal256_precision_overflow_unsafe_errors() {
4301        // Exceed Decimal128 max precision (38) after scaling
4302        let mut builder = crate::VariantArrayBuilder::new(2);
4303        builder.append_variant(
4304            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1)
4305                .unwrap()
4306                .into(),
4307        );
4308        builder.append_variant(
4309            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
4310                .unwrap()
4311                .into(),
4312        );
4313        let variant_array: ArrayRef = ArrayRef::from(builder.build());
4314
4315        let field = Field::new("result", DataType::Decimal256(76, 39), true);
4316        let cast_options = CastOptions {
4317            safe: false,
4318            ..Default::default()
4319        };
4320        let options = GetOptions::new()
4321            .with_as_type(Some(FieldRef::from(field)))
4322            .with_cast_options(cast_options);
4323        let err = variant_get(&variant_array, options).unwrap_err();
4324
4325        assert!(err.to_string().contains(
4326            "Failed to cast to Decimal256(precision=76, scale=39) from variant Decimal16"
4327        ));
4328    }
4329
4330    #[test]
4331    fn get_non_supported_temporal_types_error() {
4332        let values = vec![None, Some(Variant::Null), Some(Variant::BooleanFalse)];
4333        let variant_array: ArrayRef = ArrayRef::from(VariantArray::from_iter(values));
4334
4335        let test_cases = vec![
4336            FieldRef::from(Field::new(
4337                "result",
4338                DataType::Duration(TimeUnit::Microsecond),
4339                true,
4340            )),
4341            FieldRef::from(Field::new(
4342                "result",
4343                DataType::Interval(IntervalUnit::YearMonth),
4344                true,
4345            )),
4346        ];
4347
4348        for field in test_cases {
4349            let options = GetOptions::new().with_as_type(Some(field));
4350            let err = variant_get(&variant_array, options).unwrap_err();
4351            assert!(
4352                err.to_string()
4353                    .contains("Casting Variant to duration/interval types is not supported")
4354            );
4355        }
4356    }
4357
4358    #[test]
4359    fn get_variant_as_dictionary() {
4360        let variant_array: ArrayRef = ArrayRef::from(VariantArray::from_iter(vec![
4361            Some(Variant::from("apple")),
4362            Some(Variant::from("banana")),
4363            None,
4364            Some(Variant::from("apple")),
4365        ]));
4366        let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
4367        let options = GetOptions::new().with_as_type(Some(FieldRef::from(Field::new(
4368            "dict",
4369            data_type.clone(),
4370            true,
4371        ))));
4372
4373        let result = variant_get(&variant_array, options).unwrap();
4374        assert_eq!(result.data_type(), &data_type);
4375
4376        let decoded = cast(result.as_ref(), &DataType::Utf8).unwrap();
4377        let expected = StringArray::from(vec![Some("apple"), Some("banana"), None, Some("apple")]);
4378        assert_eq!(decoded.as_ref(), &expected);
4379    }
4380
4381    #[test]
4382    fn get_variant_as_numeric_dictionary() {
4383        let variant_array: ArrayRef = ArrayRef::from(VariantArray::from_iter(vec![
4384            Some(Variant::from(42)),
4385            Some(Variant::from(7)),
4386            None,
4387            Some(Variant::from(42)),
4388        ]));
4389        let data_type = DataType::Dictionary(Box::new(DataType::Int16), Box::new(DataType::Int32));
4390        let options = GetOptions::new().with_as_type(Some(FieldRef::from(Field::new(
4391            "dict",
4392            data_type.clone(),
4393            true,
4394        ))));
4395
4396        let result = variant_get(&variant_array, options).unwrap();
4397        assert_eq!(result.data_type(), &data_type);
4398
4399        let decoded = cast(result.as_ref(), &DataType::Int32).unwrap();
4400        let expected = Int32Array::from(vec![Some(42), Some(7), None, Some(42)]);
4401        assert_eq!(decoded.as_ref(), &expected);
4402    }
4403
4404    #[test]
4405    fn get_variant_as_run_end_encoded() {
4406        let variant_array: ArrayRef = ArrayRef::from(VariantArray::from_iter(vec![
4407            Some(Variant::from("apple")),
4408            Some(Variant::from("apple")),
4409            None,
4410            Some(Variant::from("banana")),
4411            Some(Variant::from("banana")),
4412        ]));
4413        let run_ends = Arc::new(Field::new("run_ends", DataType::Int32, false));
4414        let values = Arc::new(Field::new("values", DataType::Utf8, true));
4415        let data_type = DataType::RunEndEncoded(run_ends, values);
4416        let options = GetOptions::new().with_as_type(Some(FieldRef::from(Field::new(
4417            "ree",
4418            data_type.clone(),
4419            true,
4420        ))));
4421
4422        let result = variant_get(&variant_array, options).unwrap();
4423        assert_eq!(result.data_type(), &data_type);
4424
4425        let decoded = cast(result.as_ref(), &DataType::Utf8).unwrap();
4426        let expected = StringArray::from(vec![
4427            Some("apple"),
4428            Some("apple"),
4429            None,
4430            Some("banana"),
4431            Some("banana"),
4432        ]);
4433        assert_eq!(decoded.as_ref(), &expected);
4434    }
4435
4436    fn invalid_time_variant_array() -> ArrayRef {
4437        let mut builder = VariantArrayBuilder::new(3);
4438        // 86401000000 is invalid for Time64Microsecond (max is 86400000000)
4439        builder.append_variant(Variant::Int64(86401000000));
4440        builder.append_variant(Variant::Int64(86401000000));
4441        builder.append_variant(Variant::Int64(86401000000));
4442        Arc::new(builder.build().into_inner())
4443    }
4444
4445    #[test]
4446    fn test_variant_get_error_when_cast_failure_and_safe_false() {
4447        let variant_array = invalid_time_variant_array();
4448
4449        let field = Field::new("result", DataType::Time64(TimeUnit::Microsecond), true);
4450        let cast_options = CastOptions {
4451            safe: false, // Will error on cast failure
4452            ..Default::default()
4453        };
4454        let options = GetOptions::new()
4455            .with_as_type(Some(FieldRef::from(field)))
4456            .with_cast_options(cast_options);
4457        let err = variant_get(&variant_array, options).unwrap_err();
4458        assert!(
4459            err.to_string().contains(
4460                "Cast error: Failed to extract primitive of type Time64(µs) from variant Int64(86401000000) at path VariantPath([])"
4461            ),
4462            "actual: {err}",
4463        );
4464    }
4465
4466    #[test]
4467    fn test_variant_get_return_null_when_cast_failure_and_safe_true() {
4468        let variant_array = invalid_time_variant_array();
4469
4470        let field = Field::new("result", DataType::Time64(TimeUnit::Microsecond), true);
4471        let cast_options = CastOptions {
4472            safe: true, // Will return null on cast failure
4473            ..Default::default()
4474        };
4475        let options = GetOptions::new()
4476            .with_as_type(Some(FieldRef::from(field)))
4477            .with_cast_options(cast_options);
4478        let result = variant_get(&variant_array, options).unwrap();
4479        assert_eq!(3, result.len());
4480
4481        for i in 0..3 {
4482            assert!(result.is_null(i));
4483        }
4484    }
4485
4486    #[test]
4487    fn test_perfect_shredding_returns_same_arc_ptr() {
4488        let variant_array = perfectly_shredded_int32_variant_array();
4489
4490        let variant_array_ref = VariantArray::try_new(&variant_array).unwrap();
4491        let typed_value_arc = variant_array_ref.typed_value_column().unwrap().clone();
4492
4493        let field = Field::new("result", DataType::Int32, true);
4494        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
4495        let result = variant_get(&variant_array, options).unwrap();
4496
4497        assert!(Arc::ptr_eq(&typed_value_arc, &result));
4498    }
4499
4500    #[test]
4501    fn test_perfect_shredding_three_typed_value_columns() {
4502        // Column 1: perfectly shredded primitive with all nulls
4503        let all_nulls_values: Arc<Int32Array> = Arc::new(Int32Array::from(vec![
4504            Option::<i32>::None,
4505            Option::<i32>::None,
4506            Option::<i32>::None,
4507        ]));
4508        let all_nulls_erased: ArrayRef = all_nulls_values.clone();
4509        let all_nulls_field =
4510            ShreddedVariantFieldArray::from_parts(None, Some(all_nulls_erased.clone()), None);
4511        let all_nulls_type = all_nulls_field.data_type().clone();
4512        let all_nulls_struct: ArrayRef = ArrayRef::from(all_nulls_field);
4513
4514        // Column 2: perfectly shredded primitive with some nulls
4515        let some_nulls_values: Arc<Int32Array> =
4516            Arc::new(Int32Array::from(vec![Some(10), None, Some(30)]));
4517        let some_nulls_erased: ArrayRef = some_nulls_values.clone();
4518        let some_nulls_field =
4519            ShreddedVariantFieldArray::from_parts(None, Some(some_nulls_erased.clone()), None);
4520        let some_nulls_type = some_nulls_field.data_type().clone();
4521        let some_nulls_struct: ArrayRef = ArrayRef::from(some_nulls_field);
4522
4523        // Column 3: perfectly shredded nested struct
4524        let inner_values: Arc<Int32Array> =
4525            Arc::new(Int32Array::from(vec![Some(111), None, Some(333)]));
4526        let inner_erased: ArrayRef = inner_values.clone();
4527        let inner_field =
4528            ShreddedVariantFieldArray::from_parts(None, Some(inner_erased.clone()), None);
4529        let inner_field_type = inner_field.data_type().clone();
4530        let inner_struct_array: ArrayRef = ArrayRef::from(inner_field);
4531
4532        let nested_struct = Arc::new(
4533            StructArray::try_new(
4534                Fields::from(vec![Field::new("inner", inner_field_type, true)]),
4535                vec![inner_struct_array],
4536                None,
4537            )
4538            .unwrap(),
4539        );
4540        let nested_struct_erased: ArrayRef = nested_struct.clone();
4541        let struct_field =
4542            ShreddedVariantFieldArray::from_parts(None, Some(nested_struct_erased.clone()), None);
4543        let struct_field_type = struct_field.data_type().clone();
4544        let struct_field_struct: ArrayRef = ArrayRef::from(struct_field);
4545
4546        // Assemble the top-level typed_value struct with the three columns above
4547        let typed_value_struct = StructArray::try_new(
4548            Fields::from(vec![
4549                Field::new("all_nulls", all_nulls_type, true),
4550                Field::new("some_nulls", some_nulls_type, true),
4551                Field::new("struct_field", struct_field_type, true),
4552            ]),
4553            vec![all_nulls_struct, some_nulls_struct, struct_field_struct],
4554            None,
4555        )
4556        .unwrap();
4557
4558        let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
4559            EMPTY_VARIANT_METADATA_BYTES,
4560            all_nulls_values.len(),
4561        ));
4562        let variant_array: ArrayRef = VariantArray::from_parts(
4563            Arc::new(metadata),
4564            None,
4565            Some(Arc::new(typed_value_struct)),
4566            None,
4567        )
4568        .into();
4569
4570        // Case 1: all-null primitive column should reuse the typed_value Arc directly
4571        let all_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true));
4572        let all_nulls_result = variant_get(
4573            &variant_array,
4574            GetOptions::new_with_path(VariantPath::try_from("all_nulls").unwrap())
4575                .with_as_type(Some(all_nulls_field_ref)),
4576        )
4577        .unwrap();
4578        assert!(Arc::ptr_eq(&all_nulls_result, &all_nulls_erased));
4579
4580        // Case 2: primitive column with some nulls should also reuse its typed_value Arc
4581        let some_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true));
4582        let some_nulls_result = variant_get(
4583            &variant_array,
4584            GetOptions::new_with_path(VariantPath::try_from("some_nulls").unwrap())
4585                .with_as_type(Some(some_nulls_field_ref)),
4586        )
4587        .unwrap();
4588        assert!(Arc::ptr_eq(&some_nulls_result, &some_nulls_erased));
4589
4590        // Case 3: struct column should return a StructArray composed from the nested field
4591        let struct_child_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]);
4592        let struct_field_ref = FieldRef::from(Field::new(
4593            "result",
4594            DataType::Struct(struct_child_fields.clone()),
4595            true,
4596        ));
4597        let struct_result = variant_get(
4598            &variant_array,
4599            GetOptions::new_with_path(VariantPath::try_from("struct_field").unwrap())
4600                .with_as_type(Some(struct_field_ref)),
4601        )
4602        .unwrap();
4603        let struct_array = struct_result
4604            .as_any()
4605            .downcast_ref::<StructArray>()
4606            .unwrap();
4607        assert_eq!(struct_array.len(), 3);
4608        assert_eq!(struct_array.null_count(), 0);
4609
4610        let inner_values_result = struct_array
4611            .column(0)
4612            .as_any()
4613            .downcast_ref::<Int32Array>()
4614            .unwrap();
4615        assert_eq!(inner_values_result.len(), 3);
4616        assert_eq!(inner_values_result.value(0), 111);
4617        assert!(inner_values_result.is_null(1));
4618        assert_eq!(inner_values_result.value(2), 333);
4619    }
4620
4621    #[test]
4622    fn test_variant_get_list_like_safe_cast() {
4623        let string_array: ArrayRef = Arc::new(StringArray::from(vec![
4624            r#"{"outer":{"list":[1, "two", 3]}}"#,
4625            r#"{"outer":{"list":"not a list"}}"#,
4626        ]));
4627        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
4628
4629        let element_array: ArrayRef = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
4630        let field = Arc::new(Field::new("item", Int64, true));
4631
4632        let expectations = vec![
4633            (
4634                DataType::List(field.clone()),
4635                Arc::new(ListArray::new(
4636                    field.clone(),
4637                    OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 3])),
4638                    element_array.clone(),
4639                    Some(NullBuffer::from(vec![true, false])),
4640                )) as ArrayRef,
4641            ),
4642            (
4643                DataType::LargeList(field.clone()),
4644                Arc::new(LargeListArray::new(
4645                    field.clone(),
4646                    OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 3])),
4647                    element_array.clone(),
4648                    Some(NullBuffer::from(vec![true, false])),
4649                )) as ArrayRef,
4650            ),
4651            (
4652                DataType::ListView(field.clone()),
4653                Arc::new(ListViewArray::new(
4654                    field.clone(),
4655                    ScalarBuffer::from(vec![0, 3]),
4656                    ScalarBuffer::from(vec![3, 0]),
4657                    element_array.clone(),
4658                    Some(NullBuffer::from(vec![true, false])),
4659                )) as ArrayRef,
4660            ),
4661            (
4662                DataType::LargeListView(field.clone()),
4663                Arc::new(LargeListViewArray::new(
4664                    field.clone(),
4665                    ScalarBuffer::from(vec![0, 3]),
4666                    ScalarBuffer::from(vec![3, 0]),
4667                    element_array,
4668                    Some(NullBuffer::from(vec![true, false])),
4669                )) as ArrayRef,
4670            ),
4671            (
4672                DataType::FixedSizeList(field.clone(), 3),
4673                Arc::new(FixedSizeListArray::new(
4674                    field,
4675                    3,
4676                    Arc::new(Int64Array::from(vec![
4677                        Some(1),
4678                        None,
4679                        Some(3),
4680                        None,
4681                        None,
4682                        None,
4683                    ])),
4684                    Some(NullBuffer::from(vec![true, false])),
4685                )) as ArrayRef,
4686            ),
4687        ];
4688
4689        for (request_type, expected) in expectations {
4690            let options =
4691                GetOptions::new_with_path(VariantPath::try_from("outer").unwrap().join("list"))
4692                    .with_as_type(Some(FieldRef::from(Field::new(
4693                        "result",
4694                        request_type.clone(),
4695                        true,
4696                    ))));
4697
4698            let result = variant_get(&variant_array, options).unwrap();
4699            assert_eq!(result.data_type(), expected.data_type());
4700            assert_eq!(&result, &expected);
4701        }
4702
4703        for (idx, expected) in [
4704            (0, vec![Some(1), None]),
4705            (1, vec![None, None]),
4706            (2, vec![Some(3), None]),
4707        ] {
4708            let index_options = GetOptions::new_with_path(
4709                VariantPath::try_from("outer")
4710                    .unwrap()
4711                    .join("list")
4712                    .join(idx),
4713            )
4714            .with_as_type(Some(FieldRef::from(Field::new(
4715                "result",
4716                DataType::Int64,
4717                true,
4718            ))));
4719            let index_result = variant_get(&variant_array, index_options).unwrap();
4720            let index_expected: ArrayRef = Arc::new(Int64Array::from(expected));
4721            assert_eq!(&index_result, &index_expected);
4722        }
4723    }
4724
4725    #[test]
4726    fn test_variant_get_nested_list() {
4727        use arrow::datatypes::Int64Type;
4728
4729        let string_array: ArrayRef = Arc::new(StringArray::from(vec![
4730            r#"[[1, 2], [3]]"#,
4731            r#"[[4], "not a list", [5, 6]]"#,
4732        ]));
4733        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
4734
4735        let inner_field = Arc::new(Field::new("item", Int64, true));
4736        let outer_field = Arc::new(Field::new(
4737            "item",
4738            DataType::List(inner_field.clone()),
4739            true,
4740        ));
4741        let request_type = DataType::List(outer_field.clone());
4742
4743        let options = GetOptions::new().with_as_type(Some(FieldRef::from(Field::new(
4744            "result",
4745            request_type,
4746            true,
4747        ))));
4748        let result = variant_get(&variant_array, options).unwrap();
4749        let outer = result.as_list::<i32>();
4750
4751        // Row 0: [[1, 2], [3]]
4752        let row0 = outer.value(0);
4753        let row0 = row0.as_list::<i32>();
4754        assert_eq!(row0.len(), 2);
4755        let elem0 = row0.value(0);
4756        assert_eq!(elem0.as_primitive::<Int64Type>().values(), &[1, 2]);
4757        let elem1 = row0.value(1);
4758        assert_eq!(elem1.as_primitive::<Int64Type>().values(), &[3]);
4759
4760        // Row 1: [[4], null, [5, 6]] — "not a list" becomes null inner list
4761        let row1 = outer.value(1);
4762        let row1 = row1.as_list::<i32>();
4763        assert_eq!(row1.len(), 3);
4764        let elem0 = row1.value(0);
4765        assert_eq!(elem0.as_primitive::<Int64Type>().values(), &[4]);
4766        assert!(row1.is_null(1));
4767        let elem2 = row1.value(2);
4768        assert_eq!(elem2.as_primitive::<Int64Type>().values(), &[5, 6]);
4769    }
4770
4771    #[test]
4772    fn test_variant_get_list_like_unsafe_cast_errors_on_element_mismatch() {
4773        let string_array: ArrayRef =
4774            Arc::new(StringArray::from(vec![r#"[1, "two", 3]"#, "[4, 5]"]));
4775        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
4776        let cast_options = CastOptions {
4777            safe: false,
4778            ..Default::default()
4779        };
4780
4781        let item_field = Arc::new(Field::new("item", DataType::Int64, true));
4782        let request_types = vec![
4783            DataType::List(item_field.clone()),
4784            DataType::LargeList(item_field.clone()),
4785            DataType::ListView(item_field.clone()),
4786            DataType::LargeListView(item_field),
4787        ];
4788
4789        for request_type in request_types {
4790            let options = GetOptions::new()
4791                .with_as_type(Some(FieldRef::from(Field::new(
4792                    "result",
4793                    request_type.clone(),
4794                    true,
4795                ))))
4796                .with_cast_options(cast_options.clone());
4797
4798            let err = variant_get(&variant_array, options).unwrap_err();
4799            assert!(
4800                err.to_string()
4801                    .contains("Failed to extract primitive of type Int64")
4802            );
4803        }
4804    }
4805
4806    #[test]
4807    fn test_variant_get_list_like_unsafe_cast_preserves_null_elements() {
4808        let string_array: ArrayRef = Arc::new(StringArray::from(vec![r#"[1, null, 3]"#]));
4809        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
4810        let cast_options = CastOptions {
4811            safe: false,
4812            ..Default::default()
4813        };
4814        let options = GetOptions::new()
4815            .with_as_type(Some(FieldRef::from(Field::new(
4816                "result",
4817                DataType::List(Arc::new(Field::new("item", DataType::Int64, true))),
4818                true,
4819            ))))
4820            .with_cast_options(cast_options);
4821
4822        let result = variant_get(&variant_array, options).unwrap();
4823        let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
4824        let values = list_array
4825            .values()
4826            .as_any()
4827            .downcast_ref::<Int64Array>()
4828            .unwrap();
4829
4830        assert_eq!(values.len(), 3);
4831        assert_eq!(values.value(0), 1);
4832        assert!(values.is_null(1));
4833        assert_eq!(values.value(2), 3);
4834    }
4835
4836    #[test]
4837    fn test_variant_get_list_like_unsafe_cast_errors_on_non_list() {
4838        let string_array: ArrayRef = Arc::new(StringArray::from(vec!["[1, 2]", "\"not a list\""]));
4839        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
4840        let cast_options = CastOptions {
4841            safe: false,
4842            ..Default::default()
4843        };
4844        let item_field = Arc::new(Field::new("item", Int64, true));
4845        let data_types = vec![
4846            DataType::List(item_field.clone()),
4847            DataType::LargeList(item_field.clone()),
4848            DataType::ListView(item_field.clone()),
4849            DataType::LargeListView(item_field.clone()),
4850            DataType::FixedSizeList(item_field, 2),
4851        ];
4852
4853        for data_type in data_types {
4854            let options = GetOptions::new()
4855                .with_as_type(Some(FieldRef::from(Field::new("result", data_type, true))))
4856                .with_cast_options(cast_options.clone());
4857
4858            let err = variant_get(&variant_array, options).unwrap_err();
4859            assert!(
4860                err.to_string()
4861                    .contains("Failed to extract list from variant"),
4862            );
4863        }
4864    }
4865
4866    #[test]
4867    fn test_variant_get_fixed_size_list_wrong_size() {
4868        let string_array: ArrayRef = Arc::new(StringArray::from(vec!["[1, 2, 3]"]));
4869        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
4870        let item_field = Arc::new(Field::new("item", Int64, true));
4871
4872        // With `safe` set to true, size mismatch should return Null.
4873        let options = GetOptions::new()
4874            .with_as_type(Some(FieldRef::from(Field::new(
4875                "result",
4876                DataType::FixedSizeList(item_field.clone(), 2),
4877                true,
4878            ))))
4879            .with_cast_options(CastOptions {
4880                safe: true,
4881                ..Default::default()
4882            });
4883        let result = variant_get(&variant_array, options).unwrap();
4884        let fixed_size_list = result
4885            .as_any()
4886            .downcast_ref::<FixedSizeListArray>()
4887            .expect("Expected FixedSizeListArray");
4888        assert_eq!(fixed_size_list.len(), 1);
4889        assert!(fixed_size_list.is_null(0));
4890
4891        // With `safe` set to false, error should be raised on wrong sized fixed list.
4892        let options = GetOptions::new()
4893            .with_as_type(Some(FieldRef::from(Field::new(
4894                "result",
4895                DataType::FixedSizeList(item_field.clone(), 2),
4896                true,
4897            ))))
4898            .with_cast_options(CastOptions {
4899                safe: false,
4900                ..Default::default()
4901            });
4902        let err = variant_get(&variant_array, options).unwrap_err();
4903        assert!(
4904            err.to_string()
4905                .contains("Expected fixed size list of size 2, got size 3"),
4906            "got: {err}",
4907        );
4908    }
4909
4910    macro_rules! perfectly_shredded_preserves_top_level_nulls_test {
4911        ($name:ident, $result_type:expr, $typed_value:expr, $expected_array:expr) => {
4912            perfectly_shredded_preserves_top_level_nulls_test!(
4913                $name,
4914                $result_type,
4915                $typed_value,
4916                Some(NullBuffer::from(vec![true, false, true])),
4917                $expected_array
4918            );
4919        };
4920        ($name:ident, $result_type:expr, $typed_value:expr, $parent_nulls:expr, $expected_array:expr) => {
4921            #[test]
4922            fn $name() {
4923                let metadata = Arc::new(BinaryViewArray::from_iter_values(std::iter::repeat_n(
4924                    EMPTY_VARIANT_METADATA_BYTES,
4925                    3,
4926                )));
4927                let typed_value: ArrayRef = Arc::new($typed_value);
4928                let variant_array: ArrayRef =
4929                    VariantArray::from_parts(metadata, None, Some(typed_value), $parent_nulls)
4930                        .into();
4931
4932                let result = variant_get(
4933                    &variant_array,
4934                    GetOptions::new().with_as_type(Some(FieldRef::from(Field::new(
4935                        "result",
4936                        $result_type,
4937                        true,
4938                    )))),
4939                )
4940                .unwrap();
4941
4942                let expected_array: ArrayRef = Arc::new($expected_array);
4943                assert_eq!(&result, &expected_array);
4944            }
4945        };
4946    }
4947
4948    perfectly_shredded_preserves_top_level_nulls_test!(
4949        test_variant_get_perfectly_shredded_integer_preserves_top_level_nulls,
4950        DataType::Int32,
4951        Int32Array::from(vec![Some(0_i32), Some(1_i32), Some(2_i32)]),
4952        Int32Array::from(vec![Some(0_i32), None, Some(2_i32)])
4953    );
4954
4955    perfectly_shredded_preserves_top_level_nulls_test!(
4956        test_variant_get_perfectly_shredded_integer_unions_child_and_top_level_nulls,
4957        DataType::Int32,
4958        Int32Array::from(vec![None, Some(1_i32), Some(2_i32)]),
4959        Some(NullBuffer::from(vec![true, false, true])),
4960        Int32Array::from(vec![None, None, Some(2_i32)])
4961    );
4962
4963    perfectly_shredded_preserves_top_level_nulls_test!(
4964        test_variant_get_perfectly_shredded_null_preserves_top_level_nulls,
4965        DataType::Null,
4966        NullArray::new(3),
4967        NullArray::new(3)
4968    );
4969
4970    perfectly_shredded_preserves_top_level_nulls_test!(
4971        test_variant_get_perfectly_shredded_binary_view_preserves_top_level_nulls,
4972        DataType::BinaryView,
4973        BinaryViewArray::from(vec![
4974            Some(b"Apache" as &[u8]),
4975            Some(b"masked-null" as &[u8]),
4976            Some(b"Parquet-variant" as &[u8]),
4977        ]),
4978        BinaryViewArray::from(vec![
4979            Some(b"Apache" as &[u8]),
4980            None,
4981            Some(b"Parquet-variant" as &[u8]),
4982        ])
4983    );
4984
4985    perfectly_shredded_preserves_top_level_nulls_test!(
4986        test_variant_get_perfectly_shredded_binary_preserves_top_level_nulls,
4987        DataType::Binary,
4988        BinaryArray::from(vec![
4989            Some(b"Apache" as &[u8]),
4990            Some(b"masked-null" as &[u8]),
4991            Some(b"Parquet-variant" as &[u8]),
4992        ]),
4993        BinaryArray::from(vec![
4994            Some(b"Apache" as &[u8]),
4995            None,
4996            Some(b"Parquet-variant" as &[u8]),
4997        ])
4998    );
4999
5000    perfectly_shredded_preserves_top_level_nulls_test!(
5001        test_variant_get_perfectly_shredded_decimal4_preserves_top_level_nulls,
5002        DataType::Decimal32(5, 2),
5003        Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
5004            .with_precision_and_scale(5, 2)
5005            .unwrap(),
5006        Decimal32Array::from(vec![Some(12345), None, Some(-12342)])
5007            .with_precision_and_scale(5, 2)
5008            .unwrap()
5009    );
5010
5011    perfectly_shredded_preserves_top_level_nulls_test!(
5012        test_variant_get_perfectly_shredded_decimal8_preserves_top_level_nulls,
5013        DataType::Decimal64(10, 1),
5014        Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
5015            .with_precision_and_scale(10, 1)
5016            .unwrap(),
5017        Decimal64Array::from(vec![Some(1234567809), None, Some(-1234561203)])
5018            .with_precision_and_scale(10, 1)
5019            .unwrap()
5020    );
5021
5022    perfectly_shredded_preserves_top_level_nulls_test!(
5023        test_variant_get_perfectly_shredded_decimal16_preserves_top_level_nulls,
5024        DataType::Decimal128(20, 3),
5025        Decimal128Array::from(vec![
5026            Some(i128::from_str("12345678901234567899").unwrap()),
5027            Some(i128::from_str("23445677483748324300").unwrap()),
5028            Some(i128::from_str("-12345678901234567899").unwrap()),
5029        ])
5030        .with_precision_and_scale(20, 3)
5031        .unwrap(),
5032        Decimal128Array::from(vec![
5033            Some(i128::from_str("12345678901234567899").unwrap()),
5034            None,
5035            Some(i128::from_str("-12345678901234567899").unwrap()),
5036        ])
5037        .with_precision_and_scale(20, 3)
5038        .unwrap()
5039    );
5040}