Skip to main content

parquet_variant_compute/
unshred_variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for unshredding VariantArray by folding typed_value columns back into the value column.
19
20use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder};
21use arrow::array::{
22    Array, AsArray as _, BinaryArray, BinaryViewArray, BooleanArray, FixedSizeBinaryArray,
23    FixedSizeListArray, GenericListArray, GenericListViewArray, LargeBinaryArray, LargeStringArray,
24    ListLikeArray, PrimitiveArray, StringArray, StringViewArray, StructArray,
25};
26use arrow::buffer::NullBuffer;
27use arrow::datatypes::{
28    ArrowPrimitiveType, DataType, Date32Type, Decimal32Type, Decimal64Type, Decimal128Type,
29    DecimalType, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
30    Time64MicrosecondType, TimeUnit, TimestampMicrosecondType, TimestampNanosecondType,
31};
32use arrow::error::{ArrowError, Result};
33use arrow::temporal_conversions::time64us_to_time;
34use chrono::{DateTime, Utc};
35use indexmap::IndexMap;
36use parquet_variant::{
37    ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
38    VariantDecimal16, VariantDecimalType, VariantMetadata,
39};
40use std::marker::PhantomData;
41use uuid::Uuid;
42
43/// Removes all (nested) typed_value columns from a VariantArray by converting them back to binary
44/// variant and merging the resulting values back into the value column.
45///
46/// This function efficiently converts a shredded VariantArray back to an unshredded form where all
47/// data resides in the value column.
48///
49/// # Arguments
50/// * `array` - The VariantArray to unshred
51///
52/// # Returns
53/// A new VariantArray with all data in the value column and no typed_value column
54///
55/// # Errors
56/// - If the shredded data contains spec violations (e.g., field name conflicts)
57/// - If unsupported data types are encountered in typed_value columns
58pub fn unshred_variant(array: &VariantArray) -> Result<VariantArray> {
59    // Check if already unshredded (optimization for common case)
60    if array.typed_value_field().is_none() && array.value_field().is_some() {
61        return Ok(array.clone());
62    }
63
64    // NOTE: None/None at top-level is technically invalid, but the shredding spec requires us to
65    // emit `Variant::Null` when a required value is missing.
66    let nulls = array.nulls();
67    let mut row_builder = UnshredVariantRowBuilder::try_new_opt(array.shredding_state().borrow())?
68        .unwrap_or_else(|| UnshredVariantRowBuilder::null(nulls));
69
70    let metadata = array.metadata_field();
71    let mut value_builder = VariantValueArrayBuilder::new(array.len());
72    for i in 0..array.len() {
73        if array.is_null(i) {
74            value_builder.append_null();
75        } else {
76            let metadata = VariantMetadata::new(metadata.value(i));
77            let mut value_builder = value_builder.builder_ext(&metadata);
78            row_builder.append_row(&mut value_builder, &metadata, i)?;
79        }
80    }
81
82    let value = value_builder.build()?;
83    Ok(VariantArray::from_parts(
84        metadata.clone(),
85        Some(value),
86        None,
87        nulls.cloned(),
88    ))
89}
90
91/// Row builder for converting shredded VariantArray rows back to unshredded form
92enum UnshredVariantRowBuilder<'a> {
93    PrimitiveInt8(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int8Type>>),
94    PrimitiveInt16(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int16Type>>),
95    PrimitiveInt32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int32Type>>),
96    PrimitiveInt64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int64Type>>),
97    PrimitiveFloat32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Float32Type>>),
98    PrimitiveFloat64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Float64Type>>),
99    Decimal32(DecimalUnshredRowBuilder<'a, Decimal32Type, VariantDecimal4>),
100    Decimal64(DecimalUnshredRowBuilder<'a, Decimal64Type, VariantDecimal8>),
101    Decimal128(DecimalUnshredRowBuilder<'a, Decimal128Type, VariantDecimal16>),
102    PrimitiveDate32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Date32Type>>),
103    PrimitiveTime64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Time64MicrosecondType>>),
104    TimestampMicrosecond(TimestampUnshredRowBuilder<'a, TimestampMicrosecondType>),
105    TimestampNanosecond(TimestampUnshredRowBuilder<'a, TimestampNanosecondType>),
106    PrimitiveBoolean(UnshredPrimitiveRowBuilder<'a, BooleanArray>),
107    PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>),
108    PrimitiveStringView(UnshredPrimitiveRowBuilder<'a, StringViewArray>),
109    PrimitiveLargeString(UnshredPrimitiveRowBuilder<'a, LargeStringArray>),
110    PrimitiveBinary(UnshredPrimitiveRowBuilder<'a, BinaryArray>),
111    PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>),
112    PrimitiveLargeBinary(UnshredPrimitiveRowBuilder<'a, LargeBinaryArray>),
113    PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>),
114    List(ListUnshredVariantBuilder<'a, GenericListArray<i32>>),
115    LargeList(ListUnshredVariantBuilder<'a, GenericListArray<i64>>),
116    ListView(ListUnshredVariantBuilder<'a, GenericListViewArray<i32>>),
117    LargeListView(ListUnshredVariantBuilder<'a, GenericListViewArray<i64>>),
118    FixedSizeList(ListUnshredVariantBuilder<'a, FixedSizeListArray>),
119    Struct(StructUnshredVariantBuilder<'a>),
120    ValueOnly(ValueOnlyUnshredVariantBuilder<'a>),
121    Null(NullUnshredVariantBuilder<'a>),
122}
123
124impl<'a> UnshredVariantRowBuilder<'a> {
125    /// Creates an all-null row builder.
126    fn null(nulls: Option<&'a NullBuffer>) -> Self {
127        Self::Null(NullUnshredVariantBuilder::new(nulls))
128    }
129
130    /// Appends a single row at the given value index to the supplied builder.
131    fn append_row(
132        &mut self,
133        builder: &mut impl VariantBuilderExt,
134        metadata: &VariantMetadata,
135        index: usize,
136    ) -> Result<()> {
137        match self {
138            Self::PrimitiveInt8(b) => b.append_row(builder, metadata, index),
139            Self::PrimitiveInt16(b) => b.append_row(builder, metadata, index),
140            Self::PrimitiveInt32(b) => b.append_row(builder, metadata, index),
141            Self::PrimitiveInt64(b) => b.append_row(builder, metadata, index),
142            Self::PrimitiveFloat32(b) => b.append_row(builder, metadata, index),
143            Self::PrimitiveFloat64(b) => b.append_row(builder, metadata, index),
144            Self::Decimal32(b) => b.append_row(builder, metadata, index),
145            Self::Decimal64(b) => b.append_row(builder, metadata, index),
146            Self::Decimal128(b) => b.append_row(builder, metadata, index),
147            Self::PrimitiveDate32(b) => b.append_row(builder, metadata, index),
148            Self::PrimitiveTime64(b) => b.append_row(builder, metadata, index),
149            Self::TimestampMicrosecond(b) => b.append_row(builder, metadata, index),
150            Self::TimestampNanosecond(b) => b.append_row(builder, metadata, index),
151            Self::PrimitiveBoolean(b) => b.append_row(builder, metadata, index),
152            Self::PrimitiveString(b) => b.append_row(builder, metadata, index),
153            Self::PrimitiveStringView(b) => b.append_row(builder, metadata, index),
154            Self::PrimitiveLargeString(b) => b.append_row(builder, metadata, index),
155            Self::PrimitiveBinary(b) => b.append_row(builder, metadata, index),
156            Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata, index),
157            Self::PrimitiveLargeBinary(b) => b.append_row(builder, metadata, index),
158            Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index),
159            Self::List(b) => b.append_row(builder, metadata, index),
160            Self::LargeList(b) => b.append_row(builder, metadata, index),
161            Self::ListView(b) => b.append_row(builder, metadata, index),
162            Self::LargeListView(b) => b.append_row(builder, metadata, index),
163            Self::FixedSizeList(b) => b.append_row(builder, metadata, index),
164            Self::Struct(b) => b.append_row(builder, metadata, index),
165            Self::ValueOnly(b) => b.append_row(builder, metadata, index),
166            Self::Null(b) => b.append_row(builder, metadata, index),
167        }
168    }
169
170    /// Creates a new UnshredVariantRowBuilder from shredding state
171    /// Returns None for None/None case - caller decides how to handle based on context
172    fn try_new_opt(shredding_state: BorrowedShreddingState<'a>) -> Result<Option<Self>> {
173        let value = shredding_state.value_field();
174        let typed_value = shredding_state.typed_value_field();
175        let Some(typed_value) = typed_value else {
176            // Copy the value across directly, if present. Else caller decides what to do.
177            return Ok(value.map(|v| Self::ValueOnly(ValueOnlyUnshredVariantBuilder::new(v))));
178        };
179
180        // Has typed_value -> determine type and create appropriate builder
181        macro_rules! primitive_builder {
182            ($enum_variant:ident, $cast_fn:ident) => {
183                Self::$enum_variant(UnshredPrimitiveRowBuilder::new(
184                    value,
185                    typed_value.$cast_fn(),
186                ))
187            };
188        }
189
190        let builder = match typed_value.data_type() {
191            DataType::Int8 => primitive_builder!(PrimitiveInt8, as_primitive),
192            DataType::Int16 => primitive_builder!(PrimitiveInt16, as_primitive),
193            DataType::Int32 => primitive_builder!(PrimitiveInt32, as_primitive),
194            DataType::Int64 => primitive_builder!(PrimitiveInt64, as_primitive),
195            DataType::Float32 => primitive_builder!(PrimitiveFloat32, as_primitive),
196            DataType::Float64 => primitive_builder!(PrimitiveFloat64, as_primitive),
197            DataType::Decimal32(p, s) if VariantDecimal4::is_valid_precision_and_scale(p, s) => {
198                Self::Decimal32(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
199            }
200            DataType::Decimal64(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => {
201                Self::Decimal64(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
202            }
203            DataType::Decimal128(p, s) if VariantDecimal16::is_valid_precision_and_scale(p, s) => {
204                Self::Decimal128(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
205            }
206            DataType::Decimal32(_, _)
207            | DataType::Decimal64(_, _)
208            | DataType::Decimal128(_, _)
209            | DataType::Decimal256(_, _) => {
210                return Err(ArrowError::InvalidArgumentError(format!(
211                    "{} is not a valid variant shredding type",
212                    typed_value.data_type()
213                )));
214            }
215            DataType::Date32 => primitive_builder!(PrimitiveDate32, as_primitive),
216            DataType::Time64(TimeUnit::Microsecond) => {
217                primitive_builder!(PrimitiveTime64, as_primitive)
218            }
219            DataType::Time64(time_unit) => {
220                return Err(ArrowError::InvalidArgumentError(format!(
221                    "Time64({time_unit}) is not a valid variant shredding type",
222                )));
223            }
224            DataType::Timestamp(TimeUnit::Microsecond, timezone) => Self::TimestampMicrosecond(
225                TimestampUnshredRowBuilder::new(value, typed_value, timezone.is_some()),
226            ),
227            DataType::Timestamp(TimeUnit::Nanosecond, timezone) => Self::TimestampNanosecond(
228                TimestampUnshredRowBuilder::new(value, typed_value, timezone.is_some()),
229            ),
230            DataType::Timestamp(time_unit, _) => {
231                return Err(ArrowError::InvalidArgumentError(format!(
232                    "Timestamp({time_unit}) is not a valid variant shredding type",
233                )));
234            }
235            DataType::Boolean => primitive_builder!(PrimitiveBoolean, as_boolean),
236            DataType::Utf8 => primitive_builder!(PrimitiveString, as_string),
237            DataType::Utf8View => primitive_builder!(PrimitiveStringView, as_string_view),
238            DataType::LargeUtf8 => primitive_builder!(PrimitiveLargeString, as_string),
239            DataType::Binary => primitive_builder!(PrimitiveBinary, as_binary),
240            DataType::BinaryView => primitive_builder!(PrimitiveBinaryView, as_binary_view),
241            DataType::LargeBinary => primitive_builder!(PrimitiveLargeBinary, as_binary),
242            DataType::FixedSizeBinary(16) => {
243                primitive_builder!(PrimitiveUuid, as_fixed_size_binary)
244            }
245            DataType::FixedSizeBinary(size) => {
246                return Err(ArrowError::InvalidArgumentError(format!(
247                    "FixedSizeBinary({size}) is not a valid variant shredding type",
248                )));
249            }
250            DataType::Struct(_) => Self::Struct(StructUnshredVariantBuilder::try_new(
251                value,
252                typed_value.as_struct(),
253            )?),
254            DataType::List(_) => Self::List(ListUnshredVariantBuilder::try_new(
255                value,
256                typed_value.as_list(),
257            )?),
258            DataType::LargeList(_) => Self::LargeList(ListUnshredVariantBuilder::try_new(
259                value,
260                typed_value.as_list(),
261            )?),
262            DataType::ListView(_) => Self::ListView(ListUnshredVariantBuilder::try_new(
263                value,
264                typed_value.as_list_view(),
265            )?),
266            DataType::LargeListView(_) => Self::LargeListView(ListUnshredVariantBuilder::try_new(
267                value,
268                typed_value.as_list_view(),
269            )?),
270            DataType::FixedSizeList(_, _) => Self::FixedSizeList(
271                ListUnshredVariantBuilder::try_new(value, typed_value.as_fixed_size_list())?,
272            ),
273            _ => {
274                return Err(ArrowError::NotYetImplemented(format!(
275                    "Unshredding not yet supported for type: {}",
276                    typed_value.data_type()
277                )));
278            }
279        };
280        Ok(Some(builder))
281    }
282}
283
284/// Builder for arrays with neither typed_value nor value (all NULL/Variant::Null)
285struct NullUnshredVariantBuilder<'a> {
286    nulls: Option<&'a NullBuffer>,
287}
288
289impl<'a> NullUnshredVariantBuilder<'a> {
290    fn new(nulls: Option<&'a NullBuffer>) -> Self {
291        Self { nulls }
292    }
293
294    fn append_row(
295        &mut self,
296        builder: &mut impl VariantBuilderExt,
297        _metadata: &VariantMetadata,
298        index: usize,
299    ) -> Result<()> {
300        if self.nulls.is_some_and(|nulls| nulls.is_null(index)) {
301            builder.append_null();
302        } else {
303            builder.append_value(Variant::Null);
304        }
305        Ok(())
306    }
307}
308
309/// Builder for arrays that only have value column (already unshredded)
310struct ValueOnlyUnshredVariantBuilder<'a> {
311    value: &'a arrow::array::BinaryViewArray,
312}
313
314impl<'a> ValueOnlyUnshredVariantBuilder<'a> {
315    fn new(value: &'a BinaryViewArray) -> Self {
316        Self { value }
317    }
318
319    fn append_row(
320        &mut self,
321        builder: &mut impl VariantBuilderExt,
322        metadata: &VariantMetadata,
323        index: usize,
324    ) -> Result<()> {
325        if self.value.is_null(index) {
326            builder.append_null();
327        } else {
328            let variant = Variant::new_with_metadata(metadata.clone(), self.value.value(index));
329            builder.append_value(variant);
330        }
331        Ok(())
332    }
333}
334
335/// Extension trait that directly adds row builder support for arrays that correspond to primitive
336/// variant types.
337trait AppendToVariantBuilder: Array {
338    fn append_to_variant_builder(
339        &self,
340        builder: &mut impl VariantBuilderExt,
341        index: usize,
342    ) -> Result<()>;
343}
344
345/// Macro that handles the unshredded case (typed_value is missing or NULL) and returns early if
346/// handled.  If not handled (shredded case), validates and returns the extracted value.
347macro_rules! handle_unshredded_case {
348    ($self:expr, $builder:expr, $metadata:expr, $index:expr, $partial_shredding:expr) => {{
349        let value = $self.value.as_ref().filter(|v| v.is_valid($index));
350        let value = value.map(|v| Variant::new_with_metadata($metadata.clone(), v.value($index)));
351
352        // If typed_value is null, handle unshredded case and return early
353        if $self.typed_value.is_null($index) {
354            match value {
355                Some(value) => $builder.append_value(value),
356                None => $builder.append_null(),
357            }
358            return Ok(());
359        }
360
361        // Only partial shredding allows value and typed_value to both be non-NULL
362        if !$partial_shredding && value.is_some() {
363            return Err(ArrowError::InvalidArgumentError(
364                "Invalid shredded variant: both value and typed_value are non-null".to_string(),
365            ));
366        }
367
368        // Return the extracted value for the partial shredded case
369        value
370    }};
371}
372
373/// Generic unshred builder that works with any Array implementing AppendToVariantBuilder
374struct UnshredPrimitiveRowBuilder<'a, T> {
375    value: Option<&'a BinaryViewArray>,
376    typed_value: &'a T,
377}
378
379impl<'a, T: AppendToVariantBuilder> UnshredPrimitiveRowBuilder<'a, T> {
380    fn new(value: Option<&'a BinaryViewArray>, typed_value: &'a T) -> Self {
381        Self { value, typed_value }
382    }
383
384    fn append_row(
385        &mut self,
386        builder: &mut impl VariantBuilderExt,
387        metadata: &VariantMetadata,
388        index: usize,
389    ) -> Result<()> {
390        handle_unshredded_case!(self, builder, metadata, index, false);
391
392        // If we get here, typed_value is valid and value is NULL
393        self.typed_value.append_to_variant_builder(builder, index)
394    }
395}
396
397// Macro to generate AppendToVariantBuilder implementations with optional value transformation
398macro_rules! impl_append_to_variant_builder {
399    ($array_type:ty $(, |$v:ident| $transform:expr)? ) => {
400        impl AppendToVariantBuilder for $array_type {
401            fn append_to_variant_builder(
402                &self,
403                builder: &mut impl VariantBuilderExt,
404                index: usize,
405            ) -> Result<()> {
406                let value = self.value(index);
407                $(
408                    let $v = value;
409                    let value = $transform;
410                )?
411                builder.append_value(value);
412                Ok(())
413            }
414        }
415    };
416}
417
418impl_append_to_variant_builder!(BooleanArray);
419impl_append_to_variant_builder!(StringArray);
420impl_append_to_variant_builder!(StringViewArray);
421impl_append_to_variant_builder!(LargeStringArray);
422impl_append_to_variant_builder!(BinaryArray);
423impl_append_to_variant_builder!(BinaryViewArray);
424impl_append_to_variant_builder!(LargeBinaryArray);
425impl_append_to_variant_builder!(PrimitiveArray<Int8Type>);
426impl_append_to_variant_builder!(PrimitiveArray<Int16Type>);
427impl_append_to_variant_builder!(PrimitiveArray<Int32Type>);
428impl_append_to_variant_builder!(PrimitiveArray<Int64Type>);
429impl_append_to_variant_builder!(PrimitiveArray<Float32Type>);
430impl_append_to_variant_builder!(PrimitiveArray<Float64Type>);
431
432impl_append_to_variant_builder!(PrimitiveArray<Date32Type>, |days_since_epoch| {
433    Date32Type::to_naive_date_opt(days_since_epoch).ok_or_else(|| {
434        ArrowError::InvalidArgumentError(format!("Invalid Date32 value: {days_since_epoch}"))
435    })?
436});
437
438impl_append_to_variant_builder!(
439    PrimitiveArray<Time64MicrosecondType>,
440    |micros_since_midnight| {
441        time64us_to_time(micros_since_midnight).ok_or_else(|| {
442            ArrowError::InvalidArgumentError(format!(
443                "Invalid Time64 microsecond value: {micros_since_midnight}"
444            ))
445        })?
446    }
447);
448
449// UUID from FixedSizeBinary(16)
450// NOTE: FixedSizeBinaryArray guarantees the byte length, so we can safely unwrap
451impl_append_to_variant_builder!(FixedSizeBinaryArray, |bytes| {
452    Uuid::from_slice(bytes).unwrap()
453});
454
455/// Trait for timestamp types to handle conversion to `DateTime<Utc>`
456trait TimestampType: ArrowPrimitiveType<Native = i64> {
457    fn to_datetime_utc(value: i64) -> Result<DateTime<Utc>>;
458}
459
460impl TimestampType for TimestampMicrosecondType {
461    fn to_datetime_utc(micros: i64) -> Result<DateTime<Utc>> {
462        DateTime::from_timestamp_micros(micros).ok_or_else(|| {
463            ArrowError::InvalidArgumentError(format!(
464                "Invalid timestamp microsecond value: {micros}"
465            ))
466        })
467    }
468}
469
470impl TimestampType for TimestampNanosecondType {
471    fn to_datetime_utc(nanos: i64) -> Result<DateTime<Utc>> {
472        Ok(DateTime::from_timestamp_nanos(nanos))
473    }
474}
475
476/// Generic builder for timestamp types that handles timezone-aware conversion
477struct TimestampUnshredRowBuilder<'a, T: TimestampType> {
478    value: Option<&'a BinaryViewArray>,
479    typed_value: &'a PrimitiveArray<T>,
480    has_timezone: bool,
481}
482
483impl<'a, T: TimestampType> TimestampUnshredRowBuilder<'a, T> {
484    fn new(
485        value: Option<&'a BinaryViewArray>,
486        typed_value: &'a dyn Array,
487        has_timezone: bool,
488    ) -> Self {
489        Self {
490            value,
491            typed_value: typed_value.as_primitive(),
492            has_timezone,
493        }
494    }
495
496    fn append_row(
497        &mut self,
498        builder: &mut impl VariantBuilderExt,
499        metadata: &VariantMetadata,
500        index: usize,
501    ) -> Result<()> {
502        handle_unshredded_case!(self, builder, metadata, index, false);
503
504        // If we get here, typed_value is valid and value is NULL
505        let timestamp_value = self.typed_value.value(index);
506        let dt = T::to_datetime_utc(timestamp_value)?;
507        if self.has_timezone {
508            builder.append_value(dt);
509        } else {
510            builder.append_value(dt.naive_utc());
511        }
512        Ok(())
513    }
514}
515
516/// Generic builder for decimal unshredding
517struct DecimalUnshredRowBuilder<'a, A: DecimalType, V>
518where
519    V: VariantDecimalType<Native = A::Native>,
520{
521    value: Option<&'a BinaryViewArray>,
522    typed_value: &'a PrimitiveArray<A>,
523    scale: i8,
524    _phantom: PhantomData<V>,
525}
526
527impl<'a, A: DecimalType, V> DecimalUnshredRowBuilder<'a, A, V>
528where
529    V: VariantDecimalType<Native = A::Native>,
530{
531    fn new(value: Option<&'a BinaryViewArray>, typed_value: &'a dyn Array, scale: i8) -> Self {
532        Self {
533            value,
534            typed_value: typed_value.as_primitive(),
535            scale,
536            _phantom: PhantomData,
537        }
538    }
539
540    fn append_row(
541        &mut self,
542        builder: &mut impl VariantBuilderExt,
543        metadata: &VariantMetadata,
544        index: usize,
545    ) -> Result<()> {
546        handle_unshredded_case!(self, builder, metadata, index, false);
547
548        let raw = self.typed_value.value(index);
549        let variant = V::try_new_with_signed_scale(raw, self.scale)?;
550        builder.append_value(variant);
551        Ok(())
552    }
553}
554
555/// Builder for unshredding struct/object types with nested fields
556struct StructUnshredVariantBuilder<'a> {
557    value: Option<&'a arrow::array::BinaryViewArray>,
558    typed_value: &'a arrow::array::StructArray,
559    field_unshredders: IndexMap<&'a str, Option<UnshredVariantRowBuilder<'a>>>,
560}
561
562impl<'a> StructUnshredVariantBuilder<'a> {
563    fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a StructArray) -> Result<Self> {
564        // Create unshredders for each field in constructor
565        let mut field_unshredders = IndexMap::new();
566        for (field, field_array) in typed_value.fields().iter().zip(typed_value.columns()) {
567            // Factory returns None for None/None case -- these are missing fields we should skip
568            let Some(field_array) = field_array.as_struct_opt() else {
569                return Err(ArrowError::InvalidArgumentError(format!(
570                    "Invalid shredded variant object field: expected Struct, got {}",
571                    field_array.data_type()
572                )));
573            };
574            let field_unshredder = UnshredVariantRowBuilder::try_new_opt(field_array.try_into()?)?;
575            field_unshredders.insert(field.name().as_ref(), field_unshredder);
576        }
577
578        Ok(Self {
579            value,
580            typed_value,
581            field_unshredders,
582        })
583    }
584
585    fn append_row(
586        &mut self,
587        builder: &mut impl VariantBuilderExt,
588        metadata: &VariantMetadata,
589        index: usize,
590    ) -> Result<()> {
591        let value = handle_unshredded_case!(self, builder, metadata, index, true);
592
593        // If we get here, typed_value is valid and value may or may not be valid
594        let mut object_builder = builder.try_new_object()?;
595
596        // Process typed fields (skip empty builders that indicate missing fields)
597        for (field_name, field_unshredder_opt) in &mut self.field_unshredders {
598            if let Some(field_unshredder) = field_unshredder_opt {
599                let mut field_builder = ObjectFieldBuilder::new(field_name, &mut object_builder);
600                field_unshredder.append_row(&mut field_builder, metadata, index)?;
601            }
602        }
603
604        // Process any unshredded fields (partial shredding)
605        if let Some(value) = value {
606            let Variant::Object(object) = value else {
607                return Err(ArrowError::InvalidArgumentError(
608                    "Expected object in value field for partially shredded struct".to_string(),
609                ));
610            };
611
612            for (field_name, field_value) in object.iter() {
613                if self.field_unshredders.contains_key(field_name) {
614                    return Err(ArrowError::InvalidArgumentError(format!(
615                        "Field '{field_name}' appears in both typed_value and value",
616                    )));
617                }
618                object_builder.insert_bytes(field_name, field_value);
619            }
620        }
621
622        object_builder.finish();
623        Ok(())
624    }
625}
626
627/// Builder for unshredding list/array types with recursive element processing
628struct ListUnshredVariantBuilder<'a, L: ListLikeArray> {
629    value: Option<&'a BinaryViewArray>,
630    typed_value: &'a L,
631    element_unshredder: Box<UnshredVariantRowBuilder<'a>>,
632}
633
634impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> {
635    fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a L) -> Result<Self> {
636        // Create a recursive unshredder for the list elements
637        // The element type comes from the values array of the list
638        let element_values = typed_value.values();
639
640        // For shredded lists, each element would be a ShreddedVariantFieldArray (struct)
641        // Extract value/typed_value from the element struct
642        let Some(element_values) = element_values.as_struct_opt() else {
643            return Err(ArrowError::InvalidArgumentError(format!(
644                "Invalid shredded variant array element: expected Struct, got {}",
645                element_values.data_type()
646            )));
647        };
648
649        // Create recursive unshredder for elements
650        //
651        // NOTE: A None/None array element is technically invalid, but the shredding spec
652        // requires us to emit `Variant::Null` when a required value is missing.
653        let element_unshredder = UnshredVariantRowBuilder::try_new_opt(element_values.try_into()?)?
654            .unwrap_or_else(|| UnshredVariantRowBuilder::null(None));
655
656        Ok(Self {
657            value,
658            typed_value,
659            element_unshredder: Box::new(element_unshredder),
660        })
661    }
662
663    fn append_row(
664        &mut self,
665        builder: &mut impl VariantBuilderExt,
666        metadata: &VariantMetadata,
667        index: usize,
668    ) -> Result<()> {
669        handle_unshredded_case!(self, builder, metadata, index, false);
670
671        // If we get here, typed_value is valid and value is NULL -- process the list elements
672        let mut list_builder = builder.try_new_list()?;
673        for element_index in self.typed_value.element_range(index) {
674            self.element_unshredder
675                .append_row(&mut list_builder, metadata, element_index)?;
676        }
677
678        list_builder.finish();
679        Ok(())
680    }
681}
682
683#[cfg(test)]
684mod tests {
685    use crate::VariantArray;
686    use arrow::array::{
687        BinaryArray, BinaryViewArray, LargeBinaryArray, LargeStringArray, StringViewArray,
688    };
689    use parquet_variant::Variant;
690
691    #[test]
692    fn test_unshred_utf8view_typed_value() {
693        let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
694        let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
695
696        let typed_value: arrow::array::ArrayRef = std::sync::Arc::new(StringViewArray::from(vec![
697            Some("hello"),
698            Some("middle"),
699            Some("world"),
700        ]));
701
702        let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
703
704        let result = crate::unshred_variant(&variant_array).unwrap();
705
706        assert_eq!(result.len(), 3);
707        assert_eq!(result.value(0), Variant::from("hello"));
708        assert_eq!(result.value(1), Variant::from("middle"));
709        assert_eq!(result.value(2), Variant::from("world"));
710    }
711
712    #[test]
713    fn test_unshred_largeutf8_typed_value() {
714        let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
715        let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
716
717        let typed_value: arrow::array::ArrayRef =
718            std::sync::Arc::new(LargeStringArray::from(vec![
719                Some("hello"),
720                Some("middle"),
721                Some("world"),
722            ]));
723
724        let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
725
726        let result = crate::unshred_variant(&variant_array).unwrap();
727
728        assert_eq!(result.len(), 3);
729        assert_eq!(result.value(0), Variant::from("hello"));
730        assert_eq!(result.value(1), Variant::from("middle"));
731        assert_eq!(result.value(2), Variant::from("world"));
732    }
733
734    #[test]
735    fn test_unshred_binary_typed_value() {
736        let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
737        let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
738
739        let typed_value: arrow::array::ArrayRef =
740            std::sync::Arc::new(BinaryArray::from_iter_values(vec![
741                &b"\x00\x01\x02"[..],
742                &b"\xff\xaa"[..],
743                &b"\xde\xad\xbe\xef"[..],
744            ]));
745
746        let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
747
748        let result = crate::unshred_variant(&variant_array).unwrap();
749
750        assert_eq!(result.len(), 3);
751        assert_eq!(result.value(0), Variant::from(&b"\x00\x01\x02"[..]));
752        assert_eq!(result.value(1), Variant::from(&b"\xff\xaa"[..]));
753        assert_eq!(result.value(2), Variant::from(&b"\xde\xad\xbe\xef"[..]));
754    }
755
756    #[test]
757    fn test_unshred_largebinary_typed_value() {
758        let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
759        let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
760
761        let typed_value: arrow::array::ArrayRef =
762            std::sync::Arc::new(LargeBinaryArray::from_iter_values(vec![
763                &b"\x00\x01\x02"[..],
764                &b"\xff\xaa"[..],
765                &b"\xde\xad\xbe\xef"[..],
766            ]));
767
768        let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
769
770        let result = crate::unshred_variant(&variant_array).unwrap();
771
772        assert_eq!(result.len(), 3);
773        assert_eq!(result.value(0), Variant::from(&b"\x00\x01\x02"[..]));
774        assert_eq!(result.value(1), Variant::from(&b"\xff\xaa"[..]));
775        assert_eq!(result.value(2), Variant::from(&b"\xde\xad\xbe\xef"[..]));
776    }
777}