parquet_variant_compute/
shred_variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for shredding VariantArray with a given schema.
19
20use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
21use crate::variant_to_arrow::{
22    PrimitiveVariantToArrowRowBuilder, make_primitive_variant_to_arrow_row_builder,
23};
24use crate::{VariantArray, VariantValueArrayBuilder};
25use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder};
26use arrow::buffer::NullBuffer;
27use arrow::compute::CastOptions;
28use arrow::datatypes::{DataType, Fields};
29use arrow::error::{ArrowError, Result};
30use parquet_variant::{Variant, VariantBuilderExt};
31
32use indexmap::IndexMap;
33use std::sync::Arc;
34
35/// Shreds the input binary variant using a target shredding schema derived from the requested data type.
36///
37/// For example, requesting `DataType::Int64` would produce an output variant array with the schema:
38///
39/// ```text
40/// {
41///    metadata: BINARY,
42///    value: BINARY,
43///    typed_value: LONG,
44/// }
45/// ```
46///
47/// Similarly, requesting `DataType::Struct` with two integer fields `a` and `b` would produce an
48/// output variant array with the schema:
49///
50/// ```text
51/// {
52///   metadata: BINARY,
53///   value: BINARY,
54///   typed_value: {
55///     a: {
56///       value: BINARY,
57///       typed_value: INT,
58///     },
59///     b: {
60///       value: BINARY,
61///       typed_value: INT,
62///     },
63///   }
64/// }
65/// ```
66pub fn shred_variant(array: &VariantArray, as_type: &DataType) -> Result<VariantArray> {
67    if array.typed_value_field().is_some() {
68        return Err(ArrowError::InvalidArgumentError(
69            "Input is already shredded".to_string(),
70        ));
71    }
72
73    if array.value_field().is_none() {
74        // all-null case -- nothing to do.
75        return Ok(array.clone());
76    };
77
78    let cast_options = CastOptions::default();
79    let mut builder = make_variant_to_shredded_variant_arrow_row_builder(
80        as_type,
81        &cast_options,
82        array.len(),
83        true,
84    )?;
85    for i in 0..array.len() {
86        if array.is_null(i) {
87            builder.append_null()?;
88        } else {
89            builder.append_value(array.value(i))?;
90        }
91    }
92    let (value, typed_value, nulls) = builder.finish()?;
93    Ok(VariantArray::from_parts(
94        array.metadata_field().clone(),
95        Some(value),
96        Some(typed_value),
97        nulls,
98    ))
99}
100
101pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>(
102    data_type: &'a DataType,
103    cast_options: &'a CastOptions,
104    capacity: usize,
105    top_level: bool,
106) -> Result<VariantToShreddedVariantRowBuilder<'a>> {
107    let builder = match data_type {
108        DataType::Struct(fields) => {
109            let typed_value_builder = VariantToShreddedObjectVariantRowBuilder::try_new(
110                fields,
111                cast_options,
112                capacity,
113                top_level,
114            )?;
115            VariantToShreddedVariantRowBuilder::Object(typed_value_builder)
116        }
117        DataType::List(_)
118        | DataType::LargeList(_)
119        | DataType::ListView(_)
120        | DataType::LargeListView(_)
121        | DataType::FixedSizeList(..) => {
122            return Err(ArrowError::NotYetImplemented(
123                "Shredding variant array values as arrow lists".to_string(),
124            ));
125        }
126        _ => {
127            let builder =
128                make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
129            let typed_value_builder =
130                VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity, top_level);
131            VariantToShreddedVariantRowBuilder::Primitive(typed_value_builder)
132        }
133    };
134    Ok(builder)
135}
136
137pub(crate) enum VariantToShreddedVariantRowBuilder<'a> {
138    Primitive(VariantToShreddedPrimitiveVariantRowBuilder<'a>),
139    Object(VariantToShreddedObjectVariantRowBuilder<'a>),
140}
141impl<'a> VariantToShreddedVariantRowBuilder<'a> {
142    pub fn append_null(&mut self) -> Result<()> {
143        use VariantToShreddedVariantRowBuilder::*;
144        match self {
145            Primitive(b) => b.append_null(),
146            Object(b) => b.append_null(),
147        }
148    }
149
150    pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
151        use VariantToShreddedVariantRowBuilder::*;
152        match self {
153            Primitive(b) => b.append_value(value),
154            Object(b) => b.append_value(value),
155        }
156    }
157
158    pub fn finish(self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
159        use VariantToShreddedVariantRowBuilder::*;
160        match self {
161            Primitive(b) => b.finish(),
162            Object(b) => b.finish(),
163        }
164    }
165}
166
167/// A top-level variant shredder -- appending NULL produces typed_value=NULL and value=Variant::Null
168pub(crate) struct VariantToShreddedPrimitiveVariantRowBuilder<'a> {
169    value_builder: VariantValueArrayBuilder,
170    typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
171    nulls: NullBufferBuilder,
172    top_level: bool,
173}
174
175impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> {
176    pub(crate) fn new(
177        typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
178        capacity: usize,
179        top_level: bool,
180    ) -> Self {
181        Self {
182            value_builder: VariantValueArrayBuilder::new(capacity),
183            typed_value_builder,
184            nulls: NullBufferBuilder::new(capacity),
185            top_level,
186        }
187    }
188    fn append_null(&mut self) -> Result<()> {
189        // Only the top-level struct that represents the variant can be nullable; object fields and
190        // array elements are non-nullable.
191        self.nulls.append(!self.top_level);
192        self.value_builder.append_null();
193        self.typed_value_builder.append_null()
194    }
195    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
196        self.nulls.append_non_null();
197        if self.typed_value_builder.append_value(&value)? {
198            self.value_builder.append_null();
199        } else {
200            self.value_builder.append_value(value);
201        }
202        Ok(true)
203    }
204    fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
205        Ok((
206            self.value_builder.build()?,
207            self.typed_value_builder.finish()?,
208            self.nulls.finish(),
209        ))
210    }
211}
212
213pub(crate) struct VariantToShreddedObjectVariantRowBuilder<'a> {
214    value_builder: VariantValueArrayBuilder,
215    typed_value_builders: IndexMap<&'a str, VariantToShreddedVariantRowBuilder<'a>>,
216    typed_value_nulls: NullBufferBuilder,
217    nulls: NullBufferBuilder,
218    top_level: bool,
219}
220
221impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
222    fn try_new(
223        fields: &'a Fields,
224        cast_options: &'a CastOptions,
225        capacity: usize,
226        top_level: bool,
227    ) -> Result<Self> {
228        let typed_value_builders = fields.iter().map(|field| {
229            let builder = make_variant_to_shredded_variant_arrow_row_builder(
230                field.data_type(),
231                cast_options,
232                capacity,
233                false,
234            )?;
235            Ok((field.name().as_str(), builder))
236        });
237        Ok(Self {
238            value_builder: VariantValueArrayBuilder::new(capacity),
239            typed_value_builders: typed_value_builders.collect::<Result<_>>()?,
240            typed_value_nulls: NullBufferBuilder::new(capacity),
241            nulls: NullBufferBuilder::new(capacity),
242            top_level,
243        })
244    }
245
246    fn append_null(&mut self) -> Result<()> {
247        // Only the top-level struct that represents the variant can be nullable; object fields and
248        // array elements are non-nullable.
249        self.nulls.append(!self.top_level);
250        self.value_builder.append_null();
251        self.typed_value_nulls.append_null();
252        for (_, typed_value_builder) in &mut self.typed_value_builders {
253            typed_value_builder.append_null()?;
254        }
255        Ok(())
256    }
257    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
258        let Variant::Object(ref obj) = value else {
259            // Not an object => fall back
260            self.nulls.append_non_null();
261            self.value_builder.append_value(value);
262            self.typed_value_nulls.append_null();
263            for (_, typed_value_builder) in &mut self.typed_value_builders {
264                typed_value_builder.append_null()?;
265            }
266            return Ok(false);
267        };
268
269        // Route the object's fields by name as either shredded or unshredded
270        let mut builder = self.value_builder.builder_ext(value.metadata());
271        let mut object_builder = builder.try_new_object()?;
272        let mut seen = std::collections::HashSet::new();
273        let mut partially_shredded = false;
274        for (field_name, value) in obj.iter() {
275            match self.typed_value_builders.get_mut(field_name) {
276                Some(typed_value_builder) => {
277                    typed_value_builder.append_value(value)?;
278                    seen.insert(field_name);
279                }
280                None => {
281                    object_builder.insert_bytes(field_name, value);
282                    partially_shredded = true;
283                }
284            }
285        }
286
287        // Handle missing fields
288        for (field_name, typed_value_builder) in &mut self.typed_value_builders {
289            if !seen.contains(field_name) {
290                typed_value_builder.append_null()?;
291            }
292        }
293
294        // Only emit the value if it captured any unshredded object fields
295        if partially_shredded {
296            object_builder.finish();
297        } else {
298            drop(object_builder);
299            self.value_builder.append_null();
300        }
301
302        self.typed_value_nulls.append_non_null();
303        self.nulls.append_non_null();
304        Ok(true)
305    }
306    fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
307        let mut builder = StructArrayBuilder::new();
308        for (field_name, typed_value_builder) in self.typed_value_builders {
309            let (value, typed_value, nulls) = typed_value_builder.finish()?;
310            let array =
311                ShreddedVariantFieldArray::from_parts(Some(value), Some(typed_value), nulls);
312            builder = builder.with_field(field_name, ArrayRef::from(array), false);
313        }
314        if let Some(nulls) = self.typed_value_nulls.finish() {
315            builder = builder.with_nulls(nulls);
316        }
317        Ok((
318            self.value_builder.build()?,
319            Arc::new(builder.build()),
320            self.nulls.finish(),
321        ))
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328    use crate::VariantArrayBuilder;
329    use arrow::array::{Array, Float64Array, Int64Array};
330    use arrow::datatypes::{DataType, Field, Fields};
331    use parquet_variant::{ObjectBuilder, ReadOnlyMetadataBuilder, Variant, VariantBuilder};
332    use std::sync::Arc;
333
334    #[test]
335    fn test_already_shredded_input_error() {
336        // Create a VariantArray that already has typed_value_field
337        // First create a valid VariantArray, then extract its parts to construct a shredded one
338        let temp_array = VariantArray::from_iter(vec![Some(Variant::from("test"))]);
339        let metadata = temp_array.metadata_field().clone();
340        let value = temp_array.value_field().unwrap().clone();
341        let typed_value = Arc::new(Int64Array::from(vec![42])) as ArrayRef;
342
343        let shredded_array =
344            VariantArray::from_parts(metadata, Some(value), Some(typed_value), None);
345
346        let result = shred_variant(&shredded_array, &DataType::Int64);
347        assert!(matches!(
348            result.unwrap_err(),
349            ArrowError::InvalidArgumentError(_)
350        ));
351    }
352
353    #[test]
354    fn test_all_null_input() {
355        // Create VariantArray with no value field (all null case)
356        let metadata = BinaryViewArray::from_iter_values([&[1u8, 0u8]]); // minimal valid metadata
357        let all_null_array = VariantArray::from_parts(metadata, None, None, None);
358        let result = shred_variant(&all_null_array, &DataType::Int64).unwrap();
359
360        // Should return array with no value/typed_value fields
361        assert!(result.value_field().is_none());
362        assert!(result.typed_value_field().is_none());
363    }
364
365    #[test]
366    fn test_unsupported_list_schema() {
367        let input = VariantArray::from_iter([Variant::from(42)]);
368        let list_schema = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
369        shred_variant(&input, &list_schema).expect_err("unsupported");
370    }
371
372    #[test]
373    fn test_primitive_shredding_comprehensive() {
374        // Test mixed scenarios in a single array
375        let input = VariantArray::from_iter(vec![
376            Some(Variant::from(42i64)),   // successful shred
377            Some(Variant::from("hello")), // failed shred (string)
378            Some(Variant::from(100i64)),  // successful shred
379            None,                         // array-level null
380            Some(Variant::Null),          // variant null
381            Some(Variant::from(3i8)),     // successful shred (int8->int64 conversion)
382        ]);
383
384        let result = shred_variant(&input, &DataType::Int64).unwrap();
385
386        // Verify structure
387        let metadata_field = result.metadata_field();
388        let value_field = result.value_field().unwrap();
389        let typed_value_field = result
390            .typed_value_field()
391            .unwrap()
392            .as_any()
393            .downcast_ref::<Int64Array>()
394            .unwrap();
395
396        // Check specific outcomes for each row
397        assert_eq!(result.len(), 6);
398
399        // Row 0: 42 -> should shred successfully
400        assert!(!result.is_null(0));
401        assert!(value_field.is_null(0)); // value should be null when shredded
402        assert!(!typed_value_field.is_null(0));
403        assert_eq!(typed_value_field.value(0), 42);
404
405        // Row 1: "hello" -> should fail to shred
406        assert!(!result.is_null(1));
407        assert!(!value_field.is_null(1)); // value should contain original
408        assert!(typed_value_field.is_null(1)); // typed_value should be null
409        assert_eq!(
410            Variant::new(metadata_field.value(1), value_field.value(1)),
411            Variant::from("hello")
412        );
413
414        // Row 2: 100 -> should shred successfully
415        assert!(!result.is_null(2));
416        assert!(value_field.is_null(2));
417        assert_eq!(typed_value_field.value(2), 100);
418
419        // Row 3: array null -> should be null in result
420        assert!(result.is_null(3));
421
422        // Row 4: Variant::Null -> should not shred (it's a null variant, not an integer)
423        assert!(!result.is_null(4));
424        assert!(!value_field.is_null(4)); // should contain Variant::Null
425        assert_eq!(
426            Variant::new(metadata_field.value(4), value_field.value(4)),
427            Variant::Null
428        );
429        assert!(typed_value_field.is_null(4));
430
431        // Row 5: 3i8 -> should shred successfully (int8->int64 conversion)
432        assert!(!result.is_null(5));
433        assert!(value_field.is_null(5)); // value should be null when shredded
434        assert!(!typed_value_field.is_null(5));
435        assert_eq!(typed_value_field.value(5), 3);
436    }
437
438    #[test]
439    fn test_primitive_different_target_types() {
440        let input = VariantArray::from_iter(vec![
441            Variant::from(42i32),
442            Variant::from(3.15f64),
443            Variant::from("not_a_number"),
444        ]);
445
446        // Test Int32 target
447        let result_int32 = shred_variant(&input, &DataType::Int32).unwrap();
448        let typed_value_int32 = result_int32
449            .typed_value_field()
450            .unwrap()
451            .as_any()
452            .downcast_ref::<arrow::array::Int32Array>()
453            .unwrap();
454        assert_eq!(typed_value_int32.value(0), 42);
455        assert!(typed_value_int32.is_null(1)); // float doesn't convert to int32
456        assert!(typed_value_int32.is_null(2)); // string doesn't convert to int32
457
458        // Test Float64 target
459        let result_float64 = shred_variant(&input, &DataType::Float64).unwrap();
460        let typed_value_float64 = result_float64
461            .typed_value_field()
462            .unwrap()
463            .as_any()
464            .downcast_ref::<Float64Array>()
465            .unwrap();
466        assert_eq!(typed_value_float64.value(0), 42.0); // int converts to float
467        assert_eq!(typed_value_float64.value(1), 3.15);
468        assert!(typed_value_float64.is_null(2)); // string doesn't convert
469    }
470
471    #[test]
472    fn test_object_shredding_comprehensive() {
473        let mut builder = VariantArrayBuilder::new(7);
474
475        // Row 0: Fully shredded object
476        builder
477            .new_object()
478            .with_field("score", 95.5f64)
479            .with_field("age", 30i64)
480            .finish();
481
482        // Row 1: Partially shredded object (extra email field)
483        builder
484            .new_object()
485            .with_field("score", 87.2f64)
486            .with_field("age", 25i64)
487            .with_field("email", "bob@example.com")
488            .finish();
489
490        // Row 2: Missing field (no score)
491        builder.new_object().with_field("age", 35i64).finish();
492
493        // Row 3: Type mismatch (score is string, age is string)
494        builder
495            .new_object()
496            .with_field("score", "ninety-five")
497            .with_field("age", "thirty")
498            .finish();
499
500        // Row 4: Non-object
501        builder.append_variant(Variant::from("not an object"));
502
503        // Row 5: Empty object
504        builder.new_object().finish();
505
506        // Row 6: Null
507        builder.append_null();
508
509        // Row 7: Object with only "wrong" fields
510        builder.new_object().with_field("foo", 10).finish();
511
512        // Row 8: Object with one "right" and one "wrong" field
513        builder
514            .new_object()
515            .with_field("score", 66.67f64)
516            .with_field("foo", 10)
517            .finish();
518
519        let input = builder.build();
520
521        // Create target schema: struct<score: float64, age: int64>
522        // Both types are supported for shredding
523        let fields = Fields::from(vec![
524            Field::new("score", DataType::Float64, true),
525            Field::new("age", DataType::Int64, true),
526        ]);
527        let target_schema = DataType::Struct(fields);
528
529        let result = shred_variant(&input, &target_schema).unwrap();
530
531        // Verify structure
532        assert!(result.value_field().is_some());
533        assert!(result.typed_value_field().is_some());
534        assert_eq!(result.len(), 9);
535
536        let metadata = result.metadata_field();
537
538        let value = result.value_field().unwrap();
539        let typed_value = result
540            .typed_value_field()
541            .unwrap()
542            .as_any()
543            .downcast_ref::<arrow::array::StructArray>()
544            .unwrap();
545
546        // Extract score and age fields from typed_value struct
547        let score_field =
548            ShreddedVariantFieldArray::try_new(typed_value.column_by_name("score").unwrap())
549                .unwrap();
550        let age_field =
551            ShreddedVariantFieldArray::try_new(typed_value.column_by_name("age").unwrap()).unwrap();
552
553        let score_value = score_field
554            .value_field()
555            .unwrap()
556            .as_any()
557            .downcast_ref::<BinaryViewArray>()
558            .unwrap();
559        let score_typed_value = score_field
560            .typed_value_field()
561            .unwrap()
562            .as_any()
563            .downcast_ref::<Float64Array>()
564            .unwrap();
565        let age_value = age_field
566            .value_field()
567            .unwrap()
568            .as_any()
569            .downcast_ref::<BinaryViewArray>()
570            .unwrap();
571        let age_typed_value = age_field
572            .typed_value_field()
573            .unwrap()
574            .as_any()
575            .downcast_ref::<Int64Array>()
576            .unwrap();
577
578        // Set up exhaustive checking of all shredded columns and their nulls/values
579        struct ShreddedValue<'m, 'v, T> {
580            value: Option<Variant<'m, 'v>>,
581            typed_value: Option<T>,
582        }
583        struct ShreddedStruct<'m, 'v> {
584            score: ShreddedValue<'m, 'v, f64>,
585            age: ShreddedValue<'m, 'v, i64>,
586        }
587        fn get_value<'m, 'v>(
588            i: usize,
589            metadata: &'m BinaryViewArray,
590            value: &'v BinaryViewArray,
591        ) -> Variant<'m, 'v> {
592            Variant::new(metadata.value(i), value.value(i))
593        }
594        let expect = |i, expected_result: Option<ShreddedValue<ShreddedStruct>>| {
595            match expected_result {
596                Some(ShreddedValue {
597                    value: expected_value,
598                    typed_value: expected_typed_value,
599                }) => {
600                    assert!(result.is_valid(i));
601                    match expected_value {
602                        Some(expected_value) => {
603                            assert!(value.is_valid(i));
604                            assert_eq!(expected_value, get_value(i, metadata, value));
605                        }
606                        None => {
607                            assert!(value.is_null(i));
608                        }
609                    }
610                    match expected_typed_value {
611                        Some(ShreddedStruct {
612                            score: expected_score,
613                            age: expected_age,
614                        }) => {
615                            assert!(typed_value.is_valid(i));
616                            assert!(score_field.is_valid(i)); // non-nullable
617                            assert!(age_field.is_valid(i)); // non-nullable
618                            match expected_score.value {
619                                Some(expected_score_value) => {
620                                    assert!(score_value.is_valid(i));
621                                    assert_eq!(
622                                        expected_score_value,
623                                        get_value(i, metadata, score_value)
624                                    );
625                                }
626                                None => {
627                                    assert!(score_value.is_null(i));
628                                }
629                            }
630                            match expected_score.typed_value {
631                                Some(expected_score) => {
632                                    assert!(score_typed_value.is_valid(i));
633                                    assert_eq!(expected_score, score_typed_value.value(i));
634                                }
635                                None => {
636                                    assert!(score_typed_value.is_null(i));
637                                }
638                            }
639                            match expected_age.value {
640                                Some(expected_age_value) => {
641                                    assert!(age_value.is_valid(i));
642                                    assert_eq!(
643                                        expected_age_value,
644                                        get_value(i, metadata, age_value)
645                                    );
646                                }
647                                None => {
648                                    assert!(age_value.is_null(i));
649                                }
650                            }
651                            match expected_age.typed_value {
652                                Some(expected_age) => {
653                                    assert!(age_typed_value.is_valid(i));
654                                    assert_eq!(expected_age, age_typed_value.value(i));
655                                }
656                                None => {
657                                    assert!(age_typed_value.is_null(i));
658                                }
659                            }
660                        }
661                        None => {
662                            assert!(typed_value.is_null(i));
663                        }
664                    }
665                }
666                None => {
667                    assert!(result.is_null(i));
668                }
669            };
670        };
671
672        // Row 0: Fully shredded - both fields shred successfully
673        expect(
674            0,
675            Some(ShreddedValue {
676                value: None,
677                typed_value: Some(ShreddedStruct {
678                    score: ShreddedValue {
679                        value: None,
680                        typed_value: Some(95.5),
681                    },
682                    age: ShreddedValue {
683                        value: None,
684                        typed_value: Some(30),
685                    },
686                }),
687            }),
688        );
689
690        // Row 1: Partially shredded - value contains extra email field
691        let mut builder = VariantBuilder::new();
692        builder
693            .new_object()
694            .with_field("email", "bob@example.com")
695            .finish();
696        let (m, v) = builder.finish();
697        let expected_value = Variant::new(&m, &v);
698
699        expect(
700            1,
701            Some(ShreddedValue {
702                value: Some(expected_value),
703                typed_value: Some(ShreddedStruct {
704                    score: ShreddedValue {
705                        value: None,
706                        typed_value: Some(87.2),
707                    },
708                    age: ShreddedValue {
709                        value: None,
710                        typed_value: Some(25),
711                    },
712                }),
713            }),
714        );
715
716        // Row 2: Fully shredded -- missing score field
717        expect(
718            2,
719            Some(ShreddedValue {
720                value: None,
721                typed_value: Some(ShreddedStruct {
722                    score: ShreddedValue {
723                        value: None,
724                        typed_value: None,
725                    },
726                    age: ShreddedValue {
727                        value: None,
728                        typed_value: Some(35),
729                    },
730                }),
731            }),
732        );
733
734        // Row 3: Type mismatches - both score and age are strings
735        expect(
736            3,
737            Some(ShreddedValue {
738                value: None,
739                typed_value: Some(ShreddedStruct {
740                    score: ShreddedValue {
741                        value: Some(Variant::from("ninety-five")),
742                        typed_value: None,
743                    },
744                    age: ShreddedValue {
745                        value: Some(Variant::from("thirty")),
746                        typed_value: None,
747                    },
748                }),
749            }),
750        );
751
752        // Row 4: Non-object - falls back to value field
753        expect(
754            4,
755            Some(ShreddedValue {
756                value: Some(Variant::from("not an object")),
757                typed_value: None,
758            }),
759        );
760
761        // Row 5: Empty object
762        expect(
763            5,
764            Some(ShreddedValue {
765                value: None,
766                typed_value: Some(ShreddedStruct {
767                    score: ShreddedValue {
768                        value: None,
769                        typed_value: None,
770                    },
771                    age: ShreddedValue {
772                        value: None,
773                        typed_value: None,
774                    },
775                }),
776            }),
777        );
778
779        // Row 6: Null
780        expect(6, None);
781
782        // Helper to correctly create a variant object using a row's existing metadata
783        let object_with_foo_field = |i| {
784            use parquet_variant::{ParentState, ValueBuilder, VariantMetadata};
785            let metadata = VariantMetadata::new(metadata.value(i));
786            let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
787            let mut value_builder = ValueBuilder::new();
788            let state = ParentState::variant(&mut value_builder, &mut metadata_builder);
789            ObjectBuilder::new(state, false)
790                .with_field("foo", 10)
791                .finish();
792            (metadata, value_builder.into_inner())
793        };
794
795        // Row 7: Object with only a "wrong" field
796        let (m, v) = object_with_foo_field(7);
797        expect(
798            7,
799            Some(ShreddedValue {
800                value: Some(Variant::new_with_metadata(m, &v)),
801                typed_value: Some(ShreddedStruct {
802                    score: ShreddedValue {
803                        value: None,
804                        typed_value: None,
805                    },
806                    age: ShreddedValue {
807                        value: None,
808                        typed_value: None,
809                    },
810                }),
811            }),
812        );
813
814        // Row 8: Object with one "wrong" and one "right" field
815        let (m, v) = object_with_foo_field(8);
816        expect(
817            8,
818            Some(ShreddedValue {
819                value: Some(Variant::new_with_metadata(m, &v)),
820                typed_value: Some(ShreddedStruct {
821                    score: ShreddedValue {
822                        value: None,
823                        typed_value: Some(66.67),
824                    },
825                    age: ShreddedValue {
826                        value: None,
827                        typed_value: None,
828                    },
829                }),
830            }),
831        );
832    }
833
834    #[test]
835    fn test_object_different_schemas() {
836        // Create object with multiple fields
837        let mut builder = VariantArrayBuilder::new(1);
838        builder
839            .new_object()
840            .with_field("id", 123i32)
841            .with_field("age", 25i64)
842            .with_field("score", 95.5f64)
843            .finish();
844        let input = builder.build();
845
846        // Test with schema containing only id field
847        let schema1 = DataType::Struct(Fields::from(vec![Field::new("id", DataType::Int32, true)]));
848        let result1 = shred_variant(&input, &schema1).unwrap();
849        let value_field1 = result1.value_field().unwrap();
850        assert!(!value_field1.is_null(0)); // should contain {"age": 25, "score": 95.5}
851
852        // Test with schema containing id and age fields
853        let schema2 = DataType::Struct(Fields::from(vec![
854            Field::new("id", DataType::Int32, true),
855            Field::new("age", DataType::Int64, true),
856        ]));
857        let result2 = shred_variant(&input, &schema2).unwrap();
858        let value_field2 = result2.value_field().unwrap();
859        assert!(!value_field2.is_null(0)); // should contain {"score": 95.5}
860
861        // Test with schema containing all fields
862        let schema3 = DataType::Struct(Fields::from(vec![
863            Field::new("id", DataType::Int32, true),
864            Field::new("age", DataType::Int64, true),
865            Field::new("score", DataType::Float64, true),
866        ]));
867        let result3 = shred_variant(&input, &schema3).unwrap();
868        let value_field3 = result3.value_field().unwrap();
869        assert!(value_field3.is_null(0)); // fully shredded, no remaining fields
870    }
871
872    #[test]
873    fn test_spec_compliance() {
874        let input = VariantArray::from_iter(vec![Variant::from(42i64), Variant::from("hello")]);
875
876        let result = shred_variant(&input, &DataType::Int64).unwrap();
877
878        // Test field access by name (not position)
879        let inner_struct = result.inner();
880        assert!(inner_struct.column_by_name("metadata").is_some());
881        assert!(inner_struct.column_by_name("value").is_some());
882        assert!(inner_struct.column_by_name("typed_value").is_some());
883
884        // Test metadata preservation
885        assert_eq!(result.metadata_field().len(), input.metadata_field().len());
886        // The metadata should be the same reference (cheap clone)
887        // Note: BinaryViewArray doesn't have a .values() method, so we compare the arrays directly
888        assert_eq!(result.metadata_field().len(), input.metadata_field().len());
889
890        // Test output structure correctness
891        assert_eq!(result.len(), input.len());
892        assert!(result.value_field().is_some());
893        assert!(result.typed_value_field().is_some());
894
895        // For primitive shredding, verify that value and typed_value are never both non-null
896        // (This rule applies to primitives; for objects, both can be non-null for partial shredding)
897        let value_field = result.value_field().unwrap();
898        let typed_value_field = result
899            .typed_value_field()
900            .unwrap()
901            .as_any()
902            .downcast_ref::<Int64Array>()
903            .unwrap();
904
905        for i in 0..result.len() {
906            if !result.is_null(i) {
907                let value_is_null = value_field.is_null(i);
908                let typed_value_is_null = typed_value_field.is_null(i);
909                // For primitive shredding, at least one should be null
910                assert!(
911                    value_is_null || typed_value_is_null,
912                    "Row {}: both value and typed_value are non-null for primitive shredding",
913                    i
914                );
915            }
916        }
917    }
918}