Skip to main content

parquet_variant_compute/
unshred_variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for unshredding VariantArray by folding typed_value columns back into the value column.
19
20use crate::arrow_to_variant::ListLikeArray;
21use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder};
22use arrow::array::{
23    Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, FixedSizeListArray,
24    GenericListArray, GenericListViewArray, PrimitiveArray, StringArray, StructArray,
25};
26use arrow::buffer::NullBuffer;
27use arrow::datatypes::{
28    ArrowPrimitiveType, DataType, Date32Type, Decimal32Type, Decimal64Type, Decimal128Type,
29    DecimalType, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
30    Time64MicrosecondType, TimeUnit, TimestampMicrosecondType, TimestampNanosecondType,
31};
32use arrow::error::{ArrowError, Result};
33use arrow::temporal_conversions::time64us_to_time;
34use chrono::{DateTime, Utc};
35use indexmap::IndexMap;
36use parquet_variant::{
37    ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
38    VariantDecimal16, VariantDecimalType, VariantMetadata,
39};
40use std::marker::PhantomData;
41use uuid::Uuid;
42
43/// Removes all (nested) typed_value columns from a VariantArray by converting them back to binary
44/// variant and merging the resulting values back into the value column.
45///
46/// This function efficiently converts a shredded VariantArray back to an unshredded form where all
47/// data resides in the value column.
48///
49/// # Arguments
50/// * `array` - The VariantArray to unshred
51///
52/// # Returns
53/// A new VariantArray with all data in the value column and no typed_value column
54///
55/// # Errors
56/// - If the shredded data contains spec violations (e.g., field name conflicts)
57/// - If unsupported data types are encountered in typed_value columns
58pub fn unshred_variant(array: &VariantArray) -> Result<VariantArray> {
59    // Check if already unshredded (optimization for common case)
60    if array.typed_value_field().is_none() && array.value_field().is_some() {
61        return Ok(array.clone());
62    }
63
64    // NOTE: None/None at top-level is technically invalid, but the shredding spec requires us to
65    // emit `Variant::Null` when a required value is missing.
66    let nulls = array.nulls();
67    let mut row_builder = UnshredVariantRowBuilder::try_new_opt(array.shredding_state().borrow())?
68        .unwrap_or_else(|| UnshredVariantRowBuilder::null(nulls));
69
70    let metadata = array.metadata_field();
71    let mut value_builder = VariantValueArrayBuilder::new(array.len());
72    for i in 0..array.len() {
73        if array.is_null(i) {
74            value_builder.append_null();
75        } else {
76            let metadata = VariantMetadata::new(metadata.value(i));
77            let mut value_builder = value_builder.builder_ext(&metadata);
78            row_builder.append_row(&mut value_builder, &metadata, i)?;
79        }
80    }
81
82    let value = value_builder.build()?;
83    Ok(VariantArray::from_parts(
84        metadata.clone(),
85        Some(value),
86        None,
87        nulls.cloned(),
88    ))
89}
90
91/// Row builder for converting shredded VariantArray rows back to unshredded form
92enum UnshredVariantRowBuilder<'a> {
93    PrimitiveInt8(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int8Type>>),
94    PrimitiveInt16(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int16Type>>),
95    PrimitiveInt32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int32Type>>),
96    PrimitiveInt64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int64Type>>),
97    PrimitiveFloat32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Float32Type>>),
98    PrimitiveFloat64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Float64Type>>),
99    Decimal32(DecimalUnshredRowBuilder<'a, Decimal32Type, VariantDecimal4>),
100    Decimal64(DecimalUnshredRowBuilder<'a, Decimal64Type, VariantDecimal8>),
101    Decimal128(DecimalUnshredRowBuilder<'a, Decimal128Type, VariantDecimal16>),
102    PrimitiveDate32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Date32Type>>),
103    PrimitiveTime64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Time64MicrosecondType>>),
104    TimestampMicrosecond(TimestampUnshredRowBuilder<'a, TimestampMicrosecondType>),
105    TimestampNanosecond(TimestampUnshredRowBuilder<'a, TimestampNanosecondType>),
106    PrimitiveBoolean(UnshredPrimitiveRowBuilder<'a, BooleanArray>),
107    PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>),
108    PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>),
109    PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>),
110    List(ListUnshredVariantBuilder<'a, GenericListArray<i32>>),
111    LargeList(ListUnshredVariantBuilder<'a, GenericListArray<i64>>),
112    ListView(ListUnshredVariantBuilder<'a, GenericListViewArray<i32>>),
113    LargeListView(ListUnshredVariantBuilder<'a, GenericListViewArray<i64>>),
114    FixedSizeList(ListUnshredVariantBuilder<'a, FixedSizeListArray>),
115    Struct(StructUnshredVariantBuilder<'a>),
116    ValueOnly(ValueOnlyUnshredVariantBuilder<'a>),
117    Null(NullUnshredVariantBuilder<'a>),
118}
119
120impl<'a> UnshredVariantRowBuilder<'a> {
121    /// Creates an all-null row builder.
122    fn null(nulls: Option<&'a NullBuffer>) -> Self {
123        Self::Null(NullUnshredVariantBuilder::new(nulls))
124    }
125
126    /// Appends a single row at the given value index to the supplied builder.
127    fn append_row(
128        &mut self,
129        builder: &mut impl VariantBuilderExt,
130        metadata: &VariantMetadata,
131        index: usize,
132    ) -> Result<()> {
133        match self {
134            Self::PrimitiveInt8(b) => b.append_row(builder, metadata, index),
135            Self::PrimitiveInt16(b) => b.append_row(builder, metadata, index),
136            Self::PrimitiveInt32(b) => b.append_row(builder, metadata, index),
137            Self::PrimitiveInt64(b) => b.append_row(builder, metadata, index),
138            Self::PrimitiveFloat32(b) => b.append_row(builder, metadata, index),
139            Self::PrimitiveFloat64(b) => b.append_row(builder, metadata, index),
140            Self::Decimal32(b) => b.append_row(builder, metadata, index),
141            Self::Decimal64(b) => b.append_row(builder, metadata, index),
142            Self::Decimal128(b) => b.append_row(builder, metadata, index),
143            Self::PrimitiveDate32(b) => b.append_row(builder, metadata, index),
144            Self::PrimitiveTime64(b) => b.append_row(builder, metadata, index),
145            Self::TimestampMicrosecond(b) => b.append_row(builder, metadata, index),
146            Self::TimestampNanosecond(b) => b.append_row(builder, metadata, index),
147            Self::PrimitiveBoolean(b) => b.append_row(builder, metadata, index),
148            Self::PrimitiveString(b) => b.append_row(builder, metadata, index),
149            Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata, index),
150            Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index),
151            Self::List(b) => b.append_row(builder, metadata, index),
152            Self::LargeList(b) => b.append_row(builder, metadata, index),
153            Self::ListView(b) => b.append_row(builder, metadata, index),
154            Self::LargeListView(b) => b.append_row(builder, metadata, index),
155            Self::FixedSizeList(b) => b.append_row(builder, metadata, index),
156            Self::Struct(b) => b.append_row(builder, metadata, index),
157            Self::ValueOnly(b) => b.append_row(builder, metadata, index),
158            Self::Null(b) => b.append_row(builder, metadata, index),
159        }
160    }
161
162    /// Creates a new UnshredVariantRowBuilder from shredding state
163    /// Returns None for None/None case - caller decides how to handle based on context
164    fn try_new_opt(shredding_state: BorrowedShreddingState<'a>) -> Result<Option<Self>> {
165        let value = shredding_state.value_field();
166        let typed_value = shredding_state.typed_value_field();
167        let Some(typed_value) = typed_value else {
168            // Copy the value across directly, if present. Else caller decides what to do.
169            return Ok(value.map(|v| Self::ValueOnly(ValueOnlyUnshredVariantBuilder::new(v))));
170        };
171
172        // Has typed_value -> determine type and create appropriate builder
173        macro_rules! primitive_builder {
174            ($enum_variant:ident, $cast_fn:ident) => {
175                Self::$enum_variant(UnshredPrimitiveRowBuilder::new(
176                    value,
177                    typed_value.$cast_fn(),
178                ))
179            };
180        }
181
182        let builder = match typed_value.data_type() {
183            DataType::Int8 => primitive_builder!(PrimitiveInt8, as_primitive),
184            DataType::Int16 => primitive_builder!(PrimitiveInt16, as_primitive),
185            DataType::Int32 => primitive_builder!(PrimitiveInt32, as_primitive),
186            DataType::Int64 => primitive_builder!(PrimitiveInt64, as_primitive),
187            DataType::Float32 => primitive_builder!(PrimitiveFloat32, as_primitive),
188            DataType::Float64 => primitive_builder!(PrimitiveFloat64, as_primitive),
189            DataType::Decimal32(p, s) if VariantDecimal4::is_valid_precision_and_scale(p, s) => {
190                Self::Decimal32(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
191            }
192            DataType::Decimal64(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => {
193                Self::Decimal64(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
194            }
195            DataType::Decimal128(p, s) if VariantDecimal16::is_valid_precision_and_scale(p, s) => {
196                Self::Decimal128(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
197            }
198            DataType::Decimal32(_, _)
199            | DataType::Decimal64(_, _)
200            | DataType::Decimal128(_, _)
201            | DataType::Decimal256(_, _) => {
202                return Err(ArrowError::InvalidArgumentError(format!(
203                    "{} is not a valid variant shredding type",
204                    typed_value.data_type()
205                )));
206            }
207            DataType::Date32 => primitive_builder!(PrimitiveDate32, as_primitive),
208            DataType::Time64(TimeUnit::Microsecond) => {
209                primitive_builder!(PrimitiveTime64, as_primitive)
210            }
211            DataType::Time64(time_unit) => {
212                return Err(ArrowError::InvalidArgumentError(format!(
213                    "Time64({time_unit}) is not a valid variant shredding type",
214                )));
215            }
216            DataType::Timestamp(TimeUnit::Microsecond, timezone) => Self::TimestampMicrosecond(
217                TimestampUnshredRowBuilder::new(value, typed_value, timezone.is_some()),
218            ),
219            DataType::Timestamp(TimeUnit::Nanosecond, timezone) => Self::TimestampNanosecond(
220                TimestampUnshredRowBuilder::new(value, typed_value, timezone.is_some()),
221            ),
222            DataType::Timestamp(time_unit, _) => {
223                return Err(ArrowError::InvalidArgumentError(format!(
224                    "Timestamp({time_unit}) is not a valid variant shredding type",
225                )));
226            }
227            DataType::Boolean => primitive_builder!(PrimitiveBoolean, as_boolean),
228            DataType::Utf8 => primitive_builder!(PrimitiveString, as_string),
229            DataType::BinaryView => primitive_builder!(PrimitiveBinaryView, as_binary_view),
230            DataType::FixedSizeBinary(16) => {
231                primitive_builder!(PrimitiveUuid, as_fixed_size_binary)
232            }
233            DataType::FixedSizeBinary(size) => {
234                return Err(ArrowError::InvalidArgumentError(format!(
235                    "FixedSizeBinary({size}) is not a valid variant shredding type",
236                )));
237            }
238            DataType::Struct(_) => Self::Struct(StructUnshredVariantBuilder::try_new(
239                value,
240                typed_value.as_struct(),
241            )?),
242            DataType::List(_) => Self::List(ListUnshredVariantBuilder::try_new(
243                value,
244                typed_value.as_list(),
245            )?),
246            DataType::LargeList(_) => Self::LargeList(ListUnshredVariantBuilder::try_new(
247                value,
248                typed_value.as_list(),
249            )?),
250            DataType::ListView(_) => Self::ListView(ListUnshredVariantBuilder::try_new(
251                value,
252                typed_value.as_list_view(),
253            )?),
254            DataType::LargeListView(_) => Self::LargeListView(ListUnshredVariantBuilder::try_new(
255                value,
256                typed_value.as_list_view(),
257            )?),
258            DataType::FixedSizeList(_, _) => Self::FixedSizeList(
259                ListUnshredVariantBuilder::try_new(value, typed_value.as_fixed_size_list())?,
260            ),
261            _ => {
262                return Err(ArrowError::NotYetImplemented(format!(
263                    "Unshredding not yet supported for type: {}",
264                    typed_value.data_type()
265                )));
266            }
267        };
268        Ok(Some(builder))
269    }
270}
271
272/// Builder for arrays with neither typed_value nor value (all NULL/Variant::Null)
273struct NullUnshredVariantBuilder<'a> {
274    nulls: Option<&'a NullBuffer>,
275}
276
277impl<'a> NullUnshredVariantBuilder<'a> {
278    fn new(nulls: Option<&'a NullBuffer>) -> Self {
279        Self { nulls }
280    }
281
282    fn append_row(
283        &mut self,
284        builder: &mut impl VariantBuilderExt,
285        _metadata: &VariantMetadata,
286        index: usize,
287    ) -> Result<()> {
288        if self.nulls.is_some_and(|nulls| nulls.is_null(index)) {
289            builder.append_null();
290        } else {
291            builder.append_value(Variant::Null);
292        }
293        Ok(())
294    }
295}
296
297/// Builder for arrays that only have value column (already unshredded)
298struct ValueOnlyUnshredVariantBuilder<'a> {
299    value: &'a arrow::array::BinaryViewArray,
300}
301
302impl<'a> ValueOnlyUnshredVariantBuilder<'a> {
303    fn new(value: &'a BinaryViewArray) -> Self {
304        Self { value }
305    }
306
307    fn append_row(
308        &mut self,
309        builder: &mut impl VariantBuilderExt,
310        metadata: &VariantMetadata,
311        index: usize,
312    ) -> Result<()> {
313        if self.value.is_null(index) {
314            builder.append_null();
315        } else {
316            let variant = Variant::new_with_metadata(metadata.clone(), self.value.value(index));
317            builder.append_value(variant);
318        }
319        Ok(())
320    }
321}
322
323/// Extension trait that directly adds row builder support for arrays that correspond to primitive
324/// variant types.
325trait AppendToVariantBuilder: Array {
326    fn append_to_variant_builder(
327        &self,
328        builder: &mut impl VariantBuilderExt,
329        index: usize,
330    ) -> Result<()>;
331}
332
333/// Macro that handles the unshredded case (typed_value is missing or NULL) and returns early if
334/// handled.  If not handled (shredded case), validates and returns the extracted value.
335macro_rules! handle_unshredded_case {
336    ($self:expr, $builder:expr, $metadata:expr, $index:expr, $partial_shredding:expr) => {{
337        let value = $self.value.as_ref().filter(|v| v.is_valid($index));
338        let value = value.map(|v| Variant::new_with_metadata($metadata.clone(), v.value($index)));
339
340        // If typed_value is null, handle unshredded case and return early
341        if $self.typed_value.is_null($index) {
342            match value {
343                Some(value) => $builder.append_value(value),
344                None => $builder.append_null(),
345            }
346            return Ok(());
347        }
348
349        // Only partial shredding allows value and typed_value to both be non-NULL
350        if !$partial_shredding && value.is_some() {
351            return Err(ArrowError::InvalidArgumentError(
352                "Invalid shredded variant: both value and typed_value are non-null".to_string(),
353            ));
354        }
355
356        // Return the extracted value for the partial shredded case
357        value
358    }};
359}
360
361/// Generic unshred builder that works with any Array implementing AppendToVariantBuilder
362struct UnshredPrimitiveRowBuilder<'a, T> {
363    value: Option<&'a BinaryViewArray>,
364    typed_value: &'a T,
365}
366
367impl<'a, T: AppendToVariantBuilder> UnshredPrimitiveRowBuilder<'a, T> {
368    fn new(value: Option<&'a BinaryViewArray>, typed_value: &'a T) -> Self {
369        Self { value, typed_value }
370    }
371
372    fn append_row(
373        &mut self,
374        builder: &mut impl VariantBuilderExt,
375        metadata: &VariantMetadata,
376        index: usize,
377    ) -> Result<()> {
378        handle_unshredded_case!(self, builder, metadata, index, false);
379
380        // If we get here, typed_value is valid and value is NULL
381        self.typed_value.append_to_variant_builder(builder, index)
382    }
383}
384
385// Macro to generate AppendToVariantBuilder implementations with optional value transformation
386macro_rules! impl_append_to_variant_builder {
387    ($array_type:ty $(, |$v:ident| $transform:expr)? ) => {
388        impl AppendToVariantBuilder for $array_type {
389            fn append_to_variant_builder(
390                &self,
391                builder: &mut impl VariantBuilderExt,
392                index: usize,
393            ) -> Result<()> {
394                let value = self.value(index);
395                $(
396                    let $v = value;
397                    let value = $transform;
398                )?
399                builder.append_value(value);
400                Ok(())
401            }
402        }
403    };
404}
405
406impl_append_to_variant_builder!(BooleanArray);
407impl_append_to_variant_builder!(StringArray);
408impl_append_to_variant_builder!(BinaryViewArray);
409impl_append_to_variant_builder!(PrimitiveArray<Int8Type>);
410impl_append_to_variant_builder!(PrimitiveArray<Int16Type>);
411impl_append_to_variant_builder!(PrimitiveArray<Int32Type>);
412impl_append_to_variant_builder!(PrimitiveArray<Int64Type>);
413impl_append_to_variant_builder!(PrimitiveArray<Float32Type>);
414impl_append_to_variant_builder!(PrimitiveArray<Float64Type>);
415
416impl_append_to_variant_builder!(PrimitiveArray<Date32Type>, |days_since_epoch| {
417    Date32Type::to_naive_date_opt(days_since_epoch).ok_or_else(|| {
418        ArrowError::InvalidArgumentError(format!("Invalid Date32 value: {days_since_epoch}"))
419    })?
420});
421
422impl_append_to_variant_builder!(
423    PrimitiveArray<Time64MicrosecondType>,
424    |micros_since_midnight| {
425        time64us_to_time(micros_since_midnight).ok_or_else(|| {
426            ArrowError::InvalidArgumentError(format!(
427                "Invalid Time64 microsecond value: {micros_since_midnight}"
428            ))
429        })?
430    }
431);
432
433// UUID from FixedSizeBinary(16)
434// NOTE: FixedSizeBinaryArray guarantees the byte length, so we can safely unwrap
435impl_append_to_variant_builder!(FixedSizeBinaryArray, |bytes| {
436    Uuid::from_slice(bytes).unwrap()
437});
438
439/// Trait for timestamp types to handle conversion to `DateTime<Utc>`
440trait TimestampType: ArrowPrimitiveType<Native = i64> {
441    fn to_datetime_utc(value: i64) -> Result<DateTime<Utc>>;
442}
443
444impl TimestampType for TimestampMicrosecondType {
445    fn to_datetime_utc(micros: i64) -> Result<DateTime<Utc>> {
446        DateTime::from_timestamp_micros(micros).ok_or_else(|| {
447            ArrowError::InvalidArgumentError(format!(
448                "Invalid timestamp microsecond value: {micros}"
449            ))
450        })
451    }
452}
453
454impl TimestampType for TimestampNanosecondType {
455    fn to_datetime_utc(nanos: i64) -> Result<DateTime<Utc>> {
456        Ok(DateTime::from_timestamp_nanos(nanos))
457    }
458}
459
460/// Generic builder for timestamp types that handles timezone-aware conversion
461struct TimestampUnshredRowBuilder<'a, T: TimestampType> {
462    value: Option<&'a BinaryViewArray>,
463    typed_value: &'a PrimitiveArray<T>,
464    has_timezone: bool,
465}
466
467impl<'a, T: TimestampType> TimestampUnshredRowBuilder<'a, T> {
468    fn new(
469        value: Option<&'a BinaryViewArray>,
470        typed_value: &'a dyn Array,
471        has_timezone: bool,
472    ) -> Self {
473        Self {
474            value,
475            typed_value: typed_value.as_primitive(),
476            has_timezone,
477        }
478    }
479
480    fn append_row(
481        &mut self,
482        builder: &mut impl VariantBuilderExt,
483        metadata: &VariantMetadata,
484        index: usize,
485    ) -> Result<()> {
486        handle_unshredded_case!(self, builder, metadata, index, false);
487
488        // If we get here, typed_value is valid and value is NULL
489        let timestamp_value = self.typed_value.value(index);
490        let dt = T::to_datetime_utc(timestamp_value)?;
491        if self.has_timezone {
492            builder.append_value(dt);
493        } else {
494            builder.append_value(dt.naive_utc());
495        }
496        Ok(())
497    }
498}
499
500/// Generic builder for decimal unshredding
501struct DecimalUnshredRowBuilder<'a, A: DecimalType, V>
502where
503    V: VariantDecimalType<Native = A::Native>,
504{
505    value: Option<&'a BinaryViewArray>,
506    typed_value: &'a PrimitiveArray<A>,
507    scale: i8,
508    _phantom: PhantomData<V>,
509}
510
511impl<'a, A: DecimalType, V> DecimalUnshredRowBuilder<'a, A, V>
512where
513    V: VariantDecimalType<Native = A::Native>,
514{
515    fn new(value: Option<&'a BinaryViewArray>, typed_value: &'a dyn Array, scale: i8) -> Self {
516        Self {
517            value,
518            typed_value: typed_value.as_primitive(),
519            scale,
520            _phantom: PhantomData,
521        }
522    }
523
524    fn append_row(
525        &mut self,
526        builder: &mut impl VariantBuilderExt,
527        metadata: &VariantMetadata,
528        index: usize,
529    ) -> Result<()> {
530        handle_unshredded_case!(self, builder, metadata, index, false);
531
532        let raw = self.typed_value.value(index);
533        let variant = V::try_new_with_signed_scale(raw, self.scale)?;
534        builder.append_value(variant);
535        Ok(())
536    }
537}
538
539/// Builder for unshredding struct/object types with nested fields
540struct StructUnshredVariantBuilder<'a> {
541    value: Option<&'a arrow::array::BinaryViewArray>,
542    typed_value: &'a arrow::array::StructArray,
543    field_unshredders: IndexMap<&'a str, Option<UnshredVariantRowBuilder<'a>>>,
544}
545
546impl<'a> StructUnshredVariantBuilder<'a> {
547    fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a StructArray) -> Result<Self> {
548        // Create unshredders for each field in constructor
549        let mut field_unshredders = IndexMap::new();
550        for (field, field_array) in typed_value.fields().iter().zip(typed_value.columns()) {
551            // Factory returns None for None/None case -- these are missing fields we should skip
552            let Some(field_array) = field_array.as_struct_opt() else {
553                return Err(ArrowError::InvalidArgumentError(format!(
554                    "Invalid shredded variant object field: expected Struct, got {}",
555                    field_array.data_type()
556                )));
557            };
558            let field_unshredder = UnshredVariantRowBuilder::try_new_opt(field_array.try_into()?)?;
559            field_unshredders.insert(field.name().as_ref(), field_unshredder);
560        }
561
562        Ok(Self {
563            value,
564            typed_value,
565            field_unshredders,
566        })
567    }
568
569    fn append_row(
570        &mut self,
571        builder: &mut impl VariantBuilderExt,
572        metadata: &VariantMetadata,
573        index: usize,
574    ) -> Result<()> {
575        let value = handle_unshredded_case!(self, builder, metadata, index, true);
576
577        // If we get here, typed_value is valid and value may or may not be valid
578        let mut object_builder = builder.try_new_object()?;
579
580        // Process typed fields (skip empty builders that indicate missing fields)
581        for (field_name, field_unshredder_opt) in &mut self.field_unshredders {
582            if let Some(field_unshredder) = field_unshredder_opt {
583                let mut field_builder = ObjectFieldBuilder::new(field_name, &mut object_builder);
584                field_unshredder.append_row(&mut field_builder, metadata, index)?;
585            }
586        }
587
588        // Process any unshredded fields (partial shredding)
589        if let Some(value) = value {
590            let Variant::Object(object) = value else {
591                return Err(ArrowError::InvalidArgumentError(
592                    "Expected object in value field for partially shredded struct".to_string(),
593                ));
594            };
595
596            for (field_name, field_value) in object.iter() {
597                if self.field_unshredders.contains_key(field_name) {
598                    return Err(ArrowError::InvalidArgumentError(format!(
599                        "Field '{field_name}' appears in both typed_value and value",
600                    )));
601                }
602                object_builder.insert_bytes(field_name, field_value);
603            }
604        }
605
606        object_builder.finish();
607        Ok(())
608    }
609}
610
611/// Builder for unshredding list/array types with recursive element processing
612struct ListUnshredVariantBuilder<'a, L: ListLikeArray> {
613    value: Option<&'a BinaryViewArray>,
614    typed_value: &'a L,
615    element_unshredder: Box<UnshredVariantRowBuilder<'a>>,
616}
617
618impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> {
619    fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a L) -> Result<Self> {
620        // Create a recursive unshredder for the list elements
621        // The element type comes from the values array of the list
622        let element_values = typed_value.values();
623
624        // For shredded lists, each element would be a ShreddedVariantFieldArray (struct)
625        // Extract value/typed_value from the element struct
626        let Some(element_values) = element_values.as_struct_opt() else {
627            return Err(ArrowError::InvalidArgumentError(format!(
628                "Invalid shredded variant array element: expected Struct, got {}",
629                element_values.data_type()
630            )));
631        };
632
633        // Create recursive unshredder for elements
634        //
635        // NOTE: A None/None array element is technically invalid, but the shredding spec
636        // requires us to emit `Variant::Null` when a required value is missing.
637        let element_unshredder = UnshredVariantRowBuilder::try_new_opt(element_values.try_into()?)?
638            .unwrap_or_else(|| UnshredVariantRowBuilder::null(None));
639
640        Ok(Self {
641            value,
642            typed_value,
643            element_unshredder: Box::new(element_unshredder),
644        })
645    }
646
647    fn append_row(
648        &mut self,
649        builder: &mut impl VariantBuilderExt,
650        metadata: &VariantMetadata,
651        index: usize,
652    ) -> Result<()> {
653        handle_unshredded_case!(self, builder, metadata, index, false);
654
655        // If we get here, typed_value is valid and value is NULL -- process the list elements
656        let mut list_builder = builder.try_new_list()?;
657        for element_index in self.typed_value.element_range(index) {
658            self.element_unshredder
659                .append_row(&mut list_builder, metadata, element_index)?;
660        }
661
662        list_builder.finish();
663        Ok(())
664    }
665}
666
667// TODO: This code is covered by tests in `parquet/tests/variant_integration.rs`. Does that suffice?
668// Or do we also need targeted stand-alone unit tests for full coverage?