parquet_variant_compute/
shred_variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for shredding VariantArray with a given schema.
19
20use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
21use crate::variant_to_arrow::{
22    PrimitiveVariantToArrowRowBuilder, make_primitive_variant_to_arrow_row_builder,
23};
24use crate::{VariantArray, VariantValueArrayBuilder};
25use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder};
26use arrow::buffer::NullBuffer;
27use arrow::compute::CastOptions;
28use arrow::datatypes::{DataType, Fields};
29use arrow::error::{ArrowError, Result};
30use parquet_variant::{Variant, VariantBuilderExt};
31
32use indexmap::IndexMap;
33use std::sync::Arc;
34
35/// Shreds the input binary variant using a target shredding schema derived from the requested data type.
36///
37/// For example, requesting `DataType::Int64` would produce an output variant array with the schema:
38///
39/// ```text
40/// {
41///    metadata: BINARY,
42///    value: BINARY,
43///    typed_value: LONG,
44/// }
45/// ```
46///
47/// Similarly, requesting `DataType::Struct` with two integer fields `a` and `b` would produce an
48/// output variant array with the schema:
49///
50/// ```text
51/// {
52///   metadata: BINARY,
53///   value: BINARY,
54///   typed_value: {
55///     a: {
56///       value: BINARY,
57///       typed_value: INT,
58///     },
59///     b: {
60///       value: BINARY,
61///       typed_value: INT,
62///     },
63///   }
64/// }
65/// ```
66pub fn shred_variant(array: &VariantArray, as_type: &DataType) -> Result<VariantArray> {
67    if array.typed_value_field().is_some() {
68        return Err(ArrowError::InvalidArgumentError(
69            "Input is already shredded".to_string(),
70        ));
71    }
72
73    if array.value_field().is_none() {
74        // all-null case -- nothing to do.
75        return Ok(array.clone());
76    };
77
78    let cast_options = CastOptions::default();
79    let mut builder = make_variant_to_shredded_variant_arrow_row_builder(
80        as_type,
81        &cast_options,
82        array.len(),
83        true,
84    )?;
85    for i in 0..array.len() {
86        if array.is_null(i) {
87            builder.append_null()?;
88        } else {
89            builder.append_value(array.value(i))?;
90        }
91    }
92    let (value, typed_value, nulls) = builder.finish()?;
93    Ok(VariantArray::from_parts(
94        array.metadata_field().clone(),
95        Some(value),
96        Some(typed_value),
97        nulls,
98    ))
99}
100
101pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>(
102    data_type: &'a DataType,
103    cast_options: &'a CastOptions,
104    capacity: usize,
105    top_level: bool,
106) -> Result<VariantToShreddedVariantRowBuilder<'a>> {
107    let builder = match data_type {
108        DataType::Struct(fields) => {
109            let typed_value_builder = VariantToShreddedObjectVariantRowBuilder::try_new(
110                fields,
111                cast_options,
112                capacity,
113                top_level,
114            )?;
115            VariantToShreddedVariantRowBuilder::Object(typed_value_builder)
116        }
117        DataType::List(_)
118        | DataType::LargeList(_)
119        | DataType::ListView(_)
120        | DataType::LargeListView(_)
121        | DataType::FixedSizeList(..) => {
122            return Err(ArrowError::NotYetImplemented(
123                "Shredding variant array values as arrow lists".to_string(),
124            ));
125        }
126        _ => {
127            let builder =
128                make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
129            let typed_value_builder =
130                VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity, top_level);
131            VariantToShreddedVariantRowBuilder::Primitive(typed_value_builder)
132        }
133    };
134    Ok(builder)
135}
136
137pub(crate) enum VariantToShreddedVariantRowBuilder<'a> {
138    Primitive(VariantToShreddedPrimitiveVariantRowBuilder<'a>),
139    Object(VariantToShreddedObjectVariantRowBuilder<'a>),
140}
141impl<'a> VariantToShreddedVariantRowBuilder<'a> {
142    pub fn append_null(&mut self) -> Result<()> {
143        use VariantToShreddedVariantRowBuilder::*;
144        match self {
145            Primitive(b) => b.append_null(),
146            Object(b) => b.append_null(),
147        }
148    }
149
150    pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
151        use VariantToShreddedVariantRowBuilder::*;
152        match self {
153            Primitive(b) => b.append_value(value),
154            Object(b) => b.append_value(value),
155        }
156    }
157
158    pub fn finish(self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
159        use VariantToShreddedVariantRowBuilder::*;
160        match self {
161            Primitive(b) => b.finish(),
162            Object(b) => b.finish(),
163        }
164    }
165}
166
167/// A top-level variant shredder -- appending NULL produces typed_value=NULL and value=Variant::Null
168pub(crate) struct VariantToShreddedPrimitiveVariantRowBuilder<'a> {
169    value_builder: VariantValueArrayBuilder,
170    typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
171    nulls: NullBufferBuilder,
172    top_level: bool,
173}
174
175impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> {
176    pub(crate) fn new(
177        typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
178        capacity: usize,
179        top_level: bool,
180    ) -> Self {
181        Self {
182            value_builder: VariantValueArrayBuilder::new(capacity),
183            typed_value_builder,
184            nulls: NullBufferBuilder::new(capacity),
185            top_level,
186        }
187    }
188    fn append_null(&mut self) -> Result<()> {
189        // Only the top-level struct that represents the variant can be nullable; object fields and
190        // array elements are non-nullable.
191        self.nulls.append(!self.top_level);
192        self.value_builder.append_null();
193        self.typed_value_builder.append_null()
194    }
195    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
196        self.nulls.append_non_null();
197        if self.typed_value_builder.append_value(&value)? {
198            self.value_builder.append_null();
199        } else {
200            self.value_builder.append_value(value);
201        }
202        Ok(true)
203    }
204    fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
205        Ok((
206            self.value_builder.build()?,
207            self.typed_value_builder.finish()?,
208            self.nulls.finish(),
209        ))
210    }
211}
212
213pub(crate) struct VariantToShreddedObjectVariantRowBuilder<'a> {
214    value_builder: VariantValueArrayBuilder,
215    typed_value_builders: IndexMap<&'a str, VariantToShreddedVariantRowBuilder<'a>>,
216    typed_value_nulls: NullBufferBuilder,
217    nulls: NullBufferBuilder,
218    top_level: bool,
219}
220
221impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
222    fn try_new(
223        fields: &'a Fields,
224        cast_options: &'a CastOptions,
225        capacity: usize,
226        top_level: bool,
227    ) -> Result<Self> {
228        let typed_value_builders = fields.iter().map(|field| {
229            let builder = make_variant_to_shredded_variant_arrow_row_builder(
230                field.data_type(),
231                cast_options,
232                capacity,
233                false,
234            )?;
235            Ok((field.name().as_str(), builder))
236        });
237        Ok(Self {
238            value_builder: VariantValueArrayBuilder::new(capacity),
239            typed_value_builders: typed_value_builders.collect::<Result<_>>()?,
240            typed_value_nulls: NullBufferBuilder::new(capacity),
241            nulls: NullBufferBuilder::new(capacity),
242            top_level,
243        })
244    }
245
246    fn append_null(&mut self) -> Result<()> {
247        // Only the top-level struct that represents the variant can be nullable; object fields and
248        // array elements are non-nullable.
249        self.nulls.append(!self.top_level);
250        self.value_builder.append_null();
251        self.typed_value_nulls.append_null();
252        for (_, typed_value_builder) in &mut self.typed_value_builders {
253            typed_value_builder.append_null()?;
254        }
255        Ok(())
256    }
257    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
258        let Variant::Object(ref obj) = value else {
259            // Not an object => fall back
260            self.nulls.append_non_null();
261            self.value_builder.append_value(value);
262            self.typed_value_nulls.append_null();
263            for (_, typed_value_builder) in &mut self.typed_value_builders {
264                typed_value_builder.append_null()?;
265            }
266            return Ok(false);
267        };
268
269        // Route the object's fields by name as either shredded or unshredded
270        let mut builder = self.value_builder.builder_ext(value.metadata());
271        let mut object_builder = builder.try_new_object()?;
272        let mut seen = std::collections::HashSet::new();
273        let mut partially_shredded = false;
274        for (field_name, value) in obj.iter() {
275            match self.typed_value_builders.get_mut(field_name) {
276                Some(typed_value_builder) => {
277                    typed_value_builder.append_value(value)?;
278                    seen.insert(field_name);
279                }
280                None => {
281                    object_builder.insert_bytes(field_name, value);
282                    partially_shredded = true;
283                }
284            }
285        }
286
287        // Handle missing fields
288        for (field_name, typed_value_builder) in &mut self.typed_value_builders {
289            if !seen.contains(field_name) {
290                typed_value_builder.append_null()?;
291            }
292        }
293
294        // Only emit the value if it captured any unshredded object fields
295        if partially_shredded {
296            object_builder.finish();
297        } else {
298            drop(object_builder);
299            self.value_builder.append_null();
300        }
301
302        self.typed_value_nulls.append_non_null();
303        self.nulls.append_non_null();
304        Ok(true)
305    }
306    fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
307        let mut builder = StructArrayBuilder::new();
308        for (field_name, typed_value_builder) in self.typed_value_builders {
309            let (value, typed_value, nulls) = typed_value_builder.finish()?;
310            let array =
311                ShreddedVariantFieldArray::from_parts(Some(value), Some(typed_value), nulls);
312            builder = builder.with_field(field_name, ArrayRef::from(array), false);
313        }
314        if let Some(nulls) = self.typed_value_nulls.finish() {
315            builder = builder.with_nulls(nulls);
316        }
317        Ok((
318            self.value_builder.build()?,
319            Arc::new(builder.build()),
320            self.nulls.finish(),
321        ))
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328    use crate::VariantArrayBuilder;
329    use arrow::array::{Array, Float64Array, Int64Array};
330    use arrow::datatypes::{DataType, Field, Fields};
331    use parquet_variant::{ObjectBuilder, ReadOnlyMetadataBuilder, Variant, VariantBuilder};
332    use std::sync::Arc;
333
334    fn create_test_variant_array(values: Vec<Option<Variant<'_, '_>>>) -> VariantArray {
335        let mut builder = VariantArrayBuilder::new(values.len());
336        for value in values {
337            match value {
338                Some(v) => builder.append_variant(v),
339                None => builder.append_null(),
340            }
341        }
342        builder.build()
343    }
344
345    #[test]
346    fn test_already_shredded_input_error() {
347        // Create a VariantArray that already has typed_value_field
348        // First create a valid VariantArray, then extract its parts to construct a shredded one
349        let temp_array = create_test_variant_array(vec![Some(Variant::from("test"))]);
350        let metadata = temp_array.metadata_field().clone();
351        let value = temp_array.value_field().unwrap().clone();
352        let typed_value = Arc::new(Int64Array::from(vec![42])) as ArrayRef;
353
354        let shredded_array =
355            VariantArray::from_parts(metadata, Some(value), Some(typed_value), None);
356
357        let result = shred_variant(&shredded_array, &DataType::Int64);
358        assert!(matches!(
359            result.unwrap_err(),
360            ArrowError::InvalidArgumentError(_)
361        ));
362    }
363
364    #[test]
365    fn test_all_null_input() {
366        // Create VariantArray with no value field (all null case)
367        let metadata = BinaryViewArray::from_iter_values([&[1u8, 0u8]]); // minimal valid metadata
368        let all_null_array = VariantArray::from_parts(metadata, None, None, None);
369        let result = shred_variant(&all_null_array, &DataType::Int64).unwrap();
370
371        // Should return array with no value/typed_value fields
372        assert!(result.value_field().is_none());
373        assert!(result.typed_value_field().is_none());
374    }
375
376    #[test]
377    fn test_unsupported_list_schema() {
378        let input = create_test_variant_array(vec![Some(Variant::from(42))]);
379        let list_schema = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
380        shred_variant(&input, &list_schema).expect_err("unsupported");
381    }
382
383    #[test]
384    fn test_primitive_shredding_comprehensive() {
385        // Test mixed scenarios in a single array
386        let input = create_test_variant_array(vec![
387            Some(Variant::from(42i64)),   // successful shred
388            Some(Variant::from("hello")), // failed shred (string)
389            Some(Variant::from(100i64)),  // successful shred
390            None,                         // array-level null
391            Some(Variant::Null),          // variant null
392            Some(Variant::from(3i8)),     // successful shred (int8->int64 conversion)
393        ]);
394
395        let result = shred_variant(&input, &DataType::Int64).unwrap();
396
397        // Verify structure
398        let metadata_field = result.metadata_field();
399        let value_field = result.value_field().unwrap();
400        let typed_value_field = result
401            .typed_value_field()
402            .unwrap()
403            .as_any()
404            .downcast_ref::<Int64Array>()
405            .unwrap();
406
407        // Check specific outcomes for each row
408        assert_eq!(result.len(), 6);
409
410        // Row 0: 42 -> should shred successfully
411        assert!(!result.is_null(0));
412        assert!(value_field.is_null(0)); // value should be null when shredded
413        assert!(!typed_value_field.is_null(0));
414        assert_eq!(typed_value_field.value(0), 42);
415
416        // Row 1: "hello" -> should fail to shred
417        assert!(!result.is_null(1));
418        assert!(!value_field.is_null(1)); // value should contain original
419        assert!(typed_value_field.is_null(1)); // typed_value should be null
420        assert_eq!(
421            Variant::new(metadata_field.value(1), value_field.value(1)),
422            Variant::from("hello")
423        );
424
425        // Row 2: 100 -> should shred successfully
426        assert!(!result.is_null(2));
427        assert!(value_field.is_null(2));
428        assert_eq!(typed_value_field.value(2), 100);
429
430        // Row 3: array null -> should be null in result
431        assert!(result.is_null(3));
432
433        // Row 4: Variant::Null -> should not shred (it's a null variant, not an integer)
434        assert!(!result.is_null(4));
435        assert!(!value_field.is_null(4)); // should contain Variant::Null
436        assert_eq!(
437            Variant::new(metadata_field.value(4), value_field.value(4)),
438            Variant::Null
439        );
440        assert!(typed_value_field.is_null(4));
441
442        // Row 5: 3i8 -> should shred successfully (int8->int64 conversion)
443        assert!(!result.is_null(5));
444        assert!(value_field.is_null(5)); // value should be null when shredded
445        assert!(!typed_value_field.is_null(5));
446        assert_eq!(typed_value_field.value(5), 3);
447    }
448
449    #[test]
450    fn test_primitive_different_target_types() {
451        let input = create_test_variant_array(vec![
452            Some(Variant::from(42i32)),
453            Some(Variant::from(3.15f64)),
454            Some(Variant::from("not_a_number")),
455        ]);
456
457        // Test Int32 target
458        let result_int32 = shred_variant(&input, &DataType::Int32).unwrap();
459        let typed_value_int32 = result_int32
460            .typed_value_field()
461            .unwrap()
462            .as_any()
463            .downcast_ref::<arrow::array::Int32Array>()
464            .unwrap();
465        assert_eq!(typed_value_int32.value(0), 42);
466        assert!(typed_value_int32.is_null(1)); // float doesn't convert to int32
467        assert!(typed_value_int32.is_null(2)); // string doesn't convert to int32
468
469        // Test Float64 target
470        let result_float64 = shred_variant(&input, &DataType::Float64).unwrap();
471        let typed_value_float64 = result_float64
472            .typed_value_field()
473            .unwrap()
474            .as_any()
475            .downcast_ref::<Float64Array>()
476            .unwrap();
477        assert_eq!(typed_value_float64.value(0), 42.0); // int converts to float
478        assert_eq!(typed_value_float64.value(1), 3.15);
479        assert!(typed_value_float64.is_null(2)); // string doesn't convert
480    }
481
482    #[test]
483    fn test_object_shredding_comprehensive() {
484        let mut builder = VariantArrayBuilder::new(7);
485
486        // Row 0: Fully shredded object
487        builder
488            .new_object()
489            .with_field("score", 95.5f64)
490            .with_field("age", 30i64)
491            .finish();
492
493        // Row 1: Partially shredded object (extra email field)
494        builder
495            .new_object()
496            .with_field("score", 87.2f64)
497            .with_field("age", 25i64)
498            .with_field("email", "bob@example.com")
499            .finish();
500
501        // Row 2: Missing field (no score)
502        builder.new_object().with_field("age", 35i64).finish();
503
504        // Row 3: Type mismatch (score is string, age is string)
505        builder
506            .new_object()
507            .with_field("score", "ninety-five")
508            .with_field("age", "thirty")
509            .finish();
510
511        // Row 4: Non-object
512        builder.append_variant(Variant::from("not an object"));
513
514        // Row 5: Empty object
515        builder.new_object().finish();
516
517        // Row 6: Null
518        builder.append_null();
519
520        // Row 7: Object with only "wrong" fields
521        builder.new_object().with_field("foo", 10).finish();
522
523        // Row 8: Object with one "right" and one "wrong" field
524        builder
525            .new_object()
526            .with_field("score", 66.67f64)
527            .with_field("foo", 10)
528            .finish();
529
530        let input = builder.build();
531
532        // Create target schema: struct<score: float64, age: int64>
533        // Both types are supported for shredding
534        let fields = Fields::from(vec![
535            Field::new("score", DataType::Float64, true),
536            Field::new("age", DataType::Int64, true),
537        ]);
538        let target_schema = DataType::Struct(fields);
539
540        let result = shred_variant(&input, &target_schema).unwrap();
541
542        // Verify structure
543        assert!(result.value_field().is_some());
544        assert!(result.typed_value_field().is_some());
545        assert_eq!(result.len(), 9);
546
547        let metadata = result.metadata_field();
548
549        let value = result.value_field().unwrap();
550        let typed_value = result
551            .typed_value_field()
552            .unwrap()
553            .as_any()
554            .downcast_ref::<arrow::array::StructArray>()
555            .unwrap();
556
557        // Extract score and age fields from typed_value struct
558        let score_field =
559            ShreddedVariantFieldArray::try_new(typed_value.column_by_name("score").unwrap())
560                .unwrap();
561        let age_field =
562            ShreddedVariantFieldArray::try_new(typed_value.column_by_name("age").unwrap()).unwrap();
563
564        let score_value = score_field
565            .value_field()
566            .unwrap()
567            .as_any()
568            .downcast_ref::<BinaryViewArray>()
569            .unwrap();
570        let score_typed_value = score_field
571            .typed_value_field()
572            .unwrap()
573            .as_any()
574            .downcast_ref::<Float64Array>()
575            .unwrap();
576        let age_value = age_field
577            .value_field()
578            .unwrap()
579            .as_any()
580            .downcast_ref::<BinaryViewArray>()
581            .unwrap();
582        let age_typed_value = age_field
583            .typed_value_field()
584            .unwrap()
585            .as_any()
586            .downcast_ref::<Int64Array>()
587            .unwrap();
588
589        // Set up exhaustive checking of all shredded columns and their nulls/values
590        struct ShreddedValue<'m, 'v, T> {
591            value: Option<Variant<'m, 'v>>,
592            typed_value: Option<T>,
593        }
594        struct ShreddedStruct<'m, 'v> {
595            score: ShreddedValue<'m, 'v, f64>,
596            age: ShreddedValue<'m, 'v, i64>,
597        }
598        fn get_value<'m, 'v>(
599            i: usize,
600            metadata: &'m BinaryViewArray,
601            value: &'v BinaryViewArray,
602        ) -> Variant<'m, 'v> {
603            Variant::new(metadata.value(i), value.value(i))
604        }
605        let expect = |i, expected_result: Option<ShreddedValue<ShreddedStruct>>| {
606            match expected_result {
607                Some(ShreddedValue {
608                    value: expected_value,
609                    typed_value: expected_typed_value,
610                }) => {
611                    assert!(result.is_valid(i));
612                    match expected_value {
613                        Some(expected_value) => {
614                            assert!(value.is_valid(i));
615                            assert_eq!(expected_value, get_value(i, metadata, value));
616                        }
617                        None => {
618                            assert!(value.is_null(i));
619                        }
620                    }
621                    match expected_typed_value {
622                        Some(ShreddedStruct {
623                            score: expected_score,
624                            age: expected_age,
625                        }) => {
626                            assert!(typed_value.is_valid(i));
627                            assert!(score_field.is_valid(i)); // non-nullable
628                            assert!(age_field.is_valid(i)); // non-nullable
629                            match expected_score.value {
630                                Some(expected_score_value) => {
631                                    assert!(score_value.is_valid(i));
632                                    assert_eq!(
633                                        expected_score_value,
634                                        get_value(i, metadata, score_value)
635                                    );
636                                }
637                                None => {
638                                    assert!(score_value.is_null(i));
639                                }
640                            }
641                            match expected_score.typed_value {
642                                Some(expected_score) => {
643                                    assert!(score_typed_value.is_valid(i));
644                                    assert_eq!(expected_score, score_typed_value.value(i));
645                                }
646                                None => {
647                                    assert!(score_typed_value.is_null(i));
648                                }
649                            }
650                            match expected_age.value {
651                                Some(expected_age_value) => {
652                                    assert!(age_value.is_valid(i));
653                                    assert_eq!(
654                                        expected_age_value,
655                                        get_value(i, metadata, age_value)
656                                    );
657                                }
658                                None => {
659                                    assert!(age_value.is_null(i));
660                                }
661                            }
662                            match expected_age.typed_value {
663                                Some(expected_age) => {
664                                    assert!(age_typed_value.is_valid(i));
665                                    assert_eq!(expected_age, age_typed_value.value(i));
666                                }
667                                None => {
668                                    assert!(age_typed_value.is_null(i));
669                                }
670                            }
671                        }
672                        None => {
673                            assert!(typed_value.is_null(i));
674                        }
675                    }
676                }
677                None => {
678                    assert!(result.is_null(i));
679                }
680            };
681        };
682
683        // Row 0: Fully shredded - both fields shred successfully
684        expect(
685            0,
686            Some(ShreddedValue {
687                value: None,
688                typed_value: Some(ShreddedStruct {
689                    score: ShreddedValue {
690                        value: None,
691                        typed_value: Some(95.5),
692                    },
693                    age: ShreddedValue {
694                        value: None,
695                        typed_value: Some(30),
696                    },
697                }),
698            }),
699        );
700
701        // Row 1: Partially shredded - value contains extra email field
702        let mut builder = VariantBuilder::new();
703        builder
704            .new_object()
705            .with_field("email", "bob@example.com")
706            .finish();
707        let (m, v) = builder.finish();
708        let expected_value = Variant::new(&m, &v);
709
710        expect(
711            1,
712            Some(ShreddedValue {
713                value: Some(expected_value),
714                typed_value: Some(ShreddedStruct {
715                    score: ShreddedValue {
716                        value: None,
717                        typed_value: Some(87.2),
718                    },
719                    age: ShreddedValue {
720                        value: None,
721                        typed_value: Some(25),
722                    },
723                }),
724            }),
725        );
726
727        // Row 2: Fully shredded -- missing score field
728        expect(
729            2,
730            Some(ShreddedValue {
731                value: None,
732                typed_value: Some(ShreddedStruct {
733                    score: ShreddedValue {
734                        value: None,
735                        typed_value: None,
736                    },
737                    age: ShreddedValue {
738                        value: None,
739                        typed_value: Some(35),
740                    },
741                }),
742            }),
743        );
744
745        // Row 3: Type mismatches - both score and age are strings
746        expect(
747            3,
748            Some(ShreddedValue {
749                value: None,
750                typed_value: Some(ShreddedStruct {
751                    score: ShreddedValue {
752                        value: Some(Variant::from("ninety-five")),
753                        typed_value: None,
754                    },
755                    age: ShreddedValue {
756                        value: Some(Variant::from("thirty")),
757                        typed_value: None,
758                    },
759                }),
760            }),
761        );
762
763        // Row 4: Non-object - falls back to value field
764        expect(
765            4,
766            Some(ShreddedValue {
767                value: Some(Variant::from("not an object")),
768                typed_value: None,
769            }),
770        );
771
772        // Row 5: Empty object
773        expect(
774            5,
775            Some(ShreddedValue {
776                value: None,
777                typed_value: Some(ShreddedStruct {
778                    score: ShreddedValue {
779                        value: None,
780                        typed_value: None,
781                    },
782                    age: ShreddedValue {
783                        value: None,
784                        typed_value: None,
785                    },
786                }),
787            }),
788        );
789
790        // Row 6: Null
791        expect(6, None);
792
793        // Helper to correctly create a variant object using a row's existing metadata
794        let object_with_foo_field = |i| {
795            use parquet_variant::{ParentState, ValueBuilder, VariantMetadata};
796            let metadata = VariantMetadata::new(metadata.value(i));
797            let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
798            let mut value_builder = ValueBuilder::new();
799            let state = ParentState::variant(&mut value_builder, &mut metadata_builder);
800            ObjectBuilder::new(state, false)
801                .with_field("foo", 10)
802                .finish();
803            (metadata, value_builder.into_inner())
804        };
805
806        // Row 7: Object with only a "wrong" field
807        let (m, v) = object_with_foo_field(7);
808        expect(
809            7,
810            Some(ShreddedValue {
811                value: Some(Variant::new_with_metadata(m, &v)),
812                typed_value: Some(ShreddedStruct {
813                    score: ShreddedValue {
814                        value: None,
815                        typed_value: None,
816                    },
817                    age: ShreddedValue {
818                        value: None,
819                        typed_value: None,
820                    },
821                }),
822            }),
823        );
824
825        // Row 8: Object with one "wrong" and one "right" field
826        let (m, v) = object_with_foo_field(8);
827        expect(
828            8,
829            Some(ShreddedValue {
830                value: Some(Variant::new_with_metadata(m, &v)),
831                typed_value: Some(ShreddedStruct {
832                    score: ShreddedValue {
833                        value: None,
834                        typed_value: Some(66.67),
835                    },
836                    age: ShreddedValue {
837                        value: None,
838                        typed_value: None,
839                    },
840                }),
841            }),
842        );
843    }
844
845    #[test]
846    fn test_object_different_schemas() {
847        // Create object with multiple fields
848        let mut builder = VariantArrayBuilder::new(1);
849        builder
850            .new_object()
851            .with_field("id", 123i32)
852            .with_field("age", 25i64)
853            .with_field("score", 95.5f64)
854            .finish();
855        let input = builder.build();
856
857        // Test with schema containing only id field
858        let schema1 = DataType::Struct(Fields::from(vec![Field::new("id", DataType::Int32, true)]));
859        let result1 = shred_variant(&input, &schema1).unwrap();
860        let value_field1 = result1.value_field().unwrap();
861        assert!(!value_field1.is_null(0)); // should contain {"age": 25, "score": 95.5}
862
863        // Test with schema containing id and age fields
864        let schema2 = DataType::Struct(Fields::from(vec![
865            Field::new("id", DataType::Int32, true),
866            Field::new("age", DataType::Int64, true),
867        ]));
868        let result2 = shred_variant(&input, &schema2).unwrap();
869        let value_field2 = result2.value_field().unwrap();
870        assert!(!value_field2.is_null(0)); // should contain {"score": 95.5}
871
872        // Test with schema containing all fields
873        let schema3 = DataType::Struct(Fields::from(vec![
874            Field::new("id", DataType::Int32, true),
875            Field::new("age", DataType::Int64, true),
876            Field::new("score", DataType::Float64, true),
877        ]));
878        let result3 = shred_variant(&input, &schema3).unwrap();
879        let value_field3 = result3.value_field().unwrap();
880        assert!(value_field3.is_null(0)); // fully shredded, no remaining fields
881    }
882
883    #[test]
884    fn test_spec_compliance() {
885        let input = create_test_variant_array(vec![
886            Some(Variant::from(42i64)),
887            Some(Variant::from("hello")),
888        ]);
889
890        let result = shred_variant(&input, &DataType::Int64).unwrap();
891
892        // Test field access by name (not position)
893        let inner_struct = result.inner();
894        assert!(inner_struct.column_by_name("metadata").is_some());
895        assert!(inner_struct.column_by_name("value").is_some());
896        assert!(inner_struct.column_by_name("typed_value").is_some());
897
898        // Test metadata preservation
899        assert_eq!(result.metadata_field().len(), input.metadata_field().len());
900        // The metadata should be the same reference (cheap clone)
901        // Note: BinaryViewArray doesn't have a .values() method, so we compare the arrays directly
902        assert_eq!(result.metadata_field().len(), input.metadata_field().len());
903
904        // Test output structure correctness
905        assert_eq!(result.len(), input.len());
906        assert!(result.value_field().is_some());
907        assert!(result.typed_value_field().is_some());
908
909        // For primitive shredding, verify that value and typed_value are never both non-null
910        // (This rule applies to primitives; for objects, both can be non-null for partial shredding)
911        let value_field = result.value_field().unwrap();
912        let typed_value_field = result
913            .typed_value_field()
914            .unwrap()
915            .as_any()
916            .downcast_ref::<Int64Array>()
917            .unwrap();
918
919        for i in 0..result.len() {
920            if !result.is_null(i) {
921                let value_is_null = value_field.is_null(i);
922                let typed_value_is_null = typed_value_field.is_null(i);
923                // For primitive shredding, at least one should be null
924                assert!(
925                    value_is_null || typed_value_is_null,
926                    "Row {}: both value and typed_value are non-null for primitive shredding",
927                    i
928                );
929            }
930        }
931    }
932}