1use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder};
21use arrow::array::{
22 Array, AsArray as _, BinaryArray, BinaryViewArray, BooleanArray, FixedSizeBinaryArray,
23 FixedSizeListArray, GenericListArray, GenericListViewArray, LargeBinaryArray, LargeStringArray,
24 ListLikeArray, PrimitiveArray, StringArray, StringViewArray, StructArray,
25};
26use arrow::buffer::NullBuffer;
27use arrow::datatypes::{
28 ArrowPrimitiveType, DataType, Date32Type, Decimal32Type, Decimal64Type, Decimal128Type,
29 DecimalType, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
30 Time64MicrosecondType, TimeUnit, TimestampMicrosecondType, TimestampNanosecondType,
31};
32use arrow::error::{ArrowError, Result};
33use arrow::temporal_conversions::time64us_to_time;
34use chrono::{DateTime, Utc};
35use indexmap::IndexMap;
36use parquet_variant::{
37 ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
38 VariantDecimal16, VariantDecimalType, VariantMetadata,
39};
40use std::marker::PhantomData;
41use uuid::Uuid;
42
43pub fn unshred_variant(array: &VariantArray) -> Result<VariantArray> {
59 if array.typed_value_field().is_none() && array.value_field().is_some() {
61 return Ok(array.clone());
62 }
63
64 let nulls = array.nulls();
67 let mut row_builder = UnshredVariantRowBuilder::try_new_opt(array.shredding_state().borrow())?
68 .unwrap_or_else(|| UnshredVariantRowBuilder::null(nulls));
69
70 let metadata = array.metadata_field();
71 let mut value_builder = VariantValueArrayBuilder::new(array.len());
72 for i in 0..array.len() {
73 if array.is_null(i) {
74 value_builder.append_null();
75 } else {
76 let metadata = VariantMetadata::new(metadata.value(i));
77 let mut value_builder = value_builder.builder_ext(&metadata);
78 row_builder.append_row(&mut value_builder, &metadata, i)?;
79 }
80 }
81
82 let value = value_builder.build()?;
83 Ok(VariantArray::from_parts(
84 metadata.clone(),
85 Some(value),
86 None,
87 nulls.cloned(),
88 ))
89}
90
91enum UnshredVariantRowBuilder<'a> {
93 PrimitiveInt8(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int8Type>>),
94 PrimitiveInt16(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int16Type>>),
95 PrimitiveInt32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int32Type>>),
96 PrimitiveInt64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int64Type>>),
97 PrimitiveFloat32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Float32Type>>),
98 PrimitiveFloat64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Float64Type>>),
99 Decimal32(DecimalUnshredRowBuilder<'a, Decimal32Type, VariantDecimal4>),
100 Decimal64(DecimalUnshredRowBuilder<'a, Decimal64Type, VariantDecimal8>),
101 Decimal128(DecimalUnshredRowBuilder<'a, Decimal128Type, VariantDecimal16>),
102 PrimitiveDate32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Date32Type>>),
103 PrimitiveTime64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Time64MicrosecondType>>),
104 TimestampMicrosecond(TimestampUnshredRowBuilder<'a, TimestampMicrosecondType>),
105 TimestampNanosecond(TimestampUnshredRowBuilder<'a, TimestampNanosecondType>),
106 PrimitiveBoolean(UnshredPrimitiveRowBuilder<'a, BooleanArray>),
107 PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>),
108 PrimitiveStringView(UnshredPrimitiveRowBuilder<'a, StringViewArray>),
109 PrimitiveLargeString(UnshredPrimitiveRowBuilder<'a, LargeStringArray>),
110 PrimitiveBinary(UnshredPrimitiveRowBuilder<'a, BinaryArray>),
111 PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>),
112 PrimitiveLargeBinary(UnshredPrimitiveRowBuilder<'a, LargeBinaryArray>),
113 PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>),
114 List(ListUnshredVariantBuilder<'a, GenericListArray<i32>>),
115 LargeList(ListUnshredVariantBuilder<'a, GenericListArray<i64>>),
116 ListView(ListUnshredVariantBuilder<'a, GenericListViewArray<i32>>),
117 LargeListView(ListUnshredVariantBuilder<'a, GenericListViewArray<i64>>),
118 FixedSizeList(ListUnshredVariantBuilder<'a, FixedSizeListArray>),
119 Struct(StructUnshredVariantBuilder<'a>),
120 ValueOnly(ValueOnlyUnshredVariantBuilder<'a>),
121 Null(NullUnshredVariantBuilder<'a>),
122}
123
124impl<'a> UnshredVariantRowBuilder<'a> {
125 fn null(nulls: Option<&'a NullBuffer>) -> Self {
127 Self::Null(NullUnshredVariantBuilder::new(nulls))
128 }
129
130 fn append_row(
132 &mut self,
133 builder: &mut impl VariantBuilderExt,
134 metadata: &VariantMetadata,
135 index: usize,
136 ) -> Result<()> {
137 match self {
138 Self::PrimitiveInt8(b) => b.append_row(builder, metadata, index),
139 Self::PrimitiveInt16(b) => b.append_row(builder, metadata, index),
140 Self::PrimitiveInt32(b) => b.append_row(builder, metadata, index),
141 Self::PrimitiveInt64(b) => b.append_row(builder, metadata, index),
142 Self::PrimitiveFloat32(b) => b.append_row(builder, metadata, index),
143 Self::PrimitiveFloat64(b) => b.append_row(builder, metadata, index),
144 Self::Decimal32(b) => b.append_row(builder, metadata, index),
145 Self::Decimal64(b) => b.append_row(builder, metadata, index),
146 Self::Decimal128(b) => b.append_row(builder, metadata, index),
147 Self::PrimitiveDate32(b) => b.append_row(builder, metadata, index),
148 Self::PrimitiveTime64(b) => b.append_row(builder, metadata, index),
149 Self::TimestampMicrosecond(b) => b.append_row(builder, metadata, index),
150 Self::TimestampNanosecond(b) => b.append_row(builder, metadata, index),
151 Self::PrimitiveBoolean(b) => b.append_row(builder, metadata, index),
152 Self::PrimitiveString(b) => b.append_row(builder, metadata, index),
153 Self::PrimitiveStringView(b) => b.append_row(builder, metadata, index),
154 Self::PrimitiveLargeString(b) => b.append_row(builder, metadata, index),
155 Self::PrimitiveBinary(b) => b.append_row(builder, metadata, index),
156 Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata, index),
157 Self::PrimitiveLargeBinary(b) => b.append_row(builder, metadata, index),
158 Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index),
159 Self::List(b) => b.append_row(builder, metadata, index),
160 Self::LargeList(b) => b.append_row(builder, metadata, index),
161 Self::ListView(b) => b.append_row(builder, metadata, index),
162 Self::LargeListView(b) => b.append_row(builder, metadata, index),
163 Self::FixedSizeList(b) => b.append_row(builder, metadata, index),
164 Self::Struct(b) => b.append_row(builder, metadata, index),
165 Self::ValueOnly(b) => b.append_row(builder, metadata, index),
166 Self::Null(b) => b.append_row(builder, metadata, index),
167 }
168 }
169
170 fn try_new_opt(shredding_state: BorrowedShreddingState<'a>) -> Result<Option<Self>> {
173 let value = shredding_state.value_field();
174 let typed_value = shredding_state.typed_value_field();
175 let Some(typed_value) = typed_value else {
176 return Ok(value.map(|v| Self::ValueOnly(ValueOnlyUnshredVariantBuilder::new(v))));
178 };
179
180 macro_rules! primitive_builder {
182 ($enum_variant:ident, $cast_fn:ident) => {
183 Self::$enum_variant(UnshredPrimitiveRowBuilder::new(
184 value,
185 typed_value.$cast_fn(),
186 ))
187 };
188 }
189
190 let builder = match typed_value.data_type() {
191 DataType::Int8 => primitive_builder!(PrimitiveInt8, as_primitive),
192 DataType::Int16 => primitive_builder!(PrimitiveInt16, as_primitive),
193 DataType::Int32 => primitive_builder!(PrimitiveInt32, as_primitive),
194 DataType::Int64 => primitive_builder!(PrimitiveInt64, as_primitive),
195 DataType::Float32 => primitive_builder!(PrimitiveFloat32, as_primitive),
196 DataType::Float64 => primitive_builder!(PrimitiveFloat64, as_primitive),
197 DataType::Decimal32(p, s) if VariantDecimal4::is_valid_precision_and_scale(p, s) => {
198 Self::Decimal32(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
199 }
200 DataType::Decimal64(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => {
201 Self::Decimal64(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
202 }
203 DataType::Decimal128(p, s) if VariantDecimal16::is_valid_precision_and_scale(p, s) => {
204 Self::Decimal128(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
205 }
206 DataType::Decimal32(_, _)
207 | DataType::Decimal64(_, _)
208 | DataType::Decimal128(_, _)
209 | DataType::Decimal256(_, _) => {
210 return Err(ArrowError::InvalidArgumentError(format!(
211 "{} is not a valid variant shredding type",
212 typed_value.data_type()
213 )));
214 }
215 DataType::Date32 => primitive_builder!(PrimitiveDate32, as_primitive),
216 DataType::Time64(TimeUnit::Microsecond) => {
217 primitive_builder!(PrimitiveTime64, as_primitive)
218 }
219 DataType::Time64(time_unit) => {
220 return Err(ArrowError::InvalidArgumentError(format!(
221 "Time64({time_unit}) is not a valid variant shredding type",
222 )));
223 }
224 DataType::Timestamp(TimeUnit::Microsecond, timezone) => Self::TimestampMicrosecond(
225 TimestampUnshredRowBuilder::new(value, typed_value, timezone.is_some()),
226 ),
227 DataType::Timestamp(TimeUnit::Nanosecond, timezone) => Self::TimestampNanosecond(
228 TimestampUnshredRowBuilder::new(value, typed_value, timezone.is_some()),
229 ),
230 DataType::Timestamp(time_unit, _) => {
231 return Err(ArrowError::InvalidArgumentError(format!(
232 "Timestamp({time_unit}) is not a valid variant shredding type",
233 )));
234 }
235 DataType::Boolean => primitive_builder!(PrimitiveBoolean, as_boolean),
236 DataType::Utf8 => primitive_builder!(PrimitiveString, as_string),
237 DataType::Utf8View => primitive_builder!(PrimitiveStringView, as_string_view),
238 DataType::LargeUtf8 => primitive_builder!(PrimitiveLargeString, as_string),
239 DataType::Binary => primitive_builder!(PrimitiveBinary, as_binary),
240 DataType::BinaryView => primitive_builder!(PrimitiveBinaryView, as_binary_view),
241 DataType::LargeBinary => primitive_builder!(PrimitiveLargeBinary, as_binary),
242 DataType::FixedSizeBinary(16) => {
243 primitive_builder!(PrimitiveUuid, as_fixed_size_binary)
244 }
245 DataType::FixedSizeBinary(size) => {
246 return Err(ArrowError::InvalidArgumentError(format!(
247 "FixedSizeBinary({size}) is not a valid variant shredding type",
248 )));
249 }
250 DataType::Struct(_) => Self::Struct(StructUnshredVariantBuilder::try_new(
251 value,
252 typed_value.as_struct(),
253 )?),
254 DataType::List(_) => Self::List(ListUnshredVariantBuilder::try_new(
255 value,
256 typed_value.as_list(),
257 )?),
258 DataType::LargeList(_) => Self::LargeList(ListUnshredVariantBuilder::try_new(
259 value,
260 typed_value.as_list(),
261 )?),
262 DataType::ListView(_) => Self::ListView(ListUnshredVariantBuilder::try_new(
263 value,
264 typed_value.as_list_view(),
265 )?),
266 DataType::LargeListView(_) => Self::LargeListView(ListUnshredVariantBuilder::try_new(
267 value,
268 typed_value.as_list_view(),
269 )?),
270 DataType::FixedSizeList(_, _) => Self::FixedSizeList(
271 ListUnshredVariantBuilder::try_new(value, typed_value.as_fixed_size_list())?,
272 ),
273 _ => {
274 return Err(ArrowError::NotYetImplemented(format!(
275 "Unshredding not yet supported for type: {}",
276 typed_value.data_type()
277 )));
278 }
279 };
280 Ok(Some(builder))
281 }
282}
283
284struct NullUnshredVariantBuilder<'a> {
286 nulls: Option<&'a NullBuffer>,
287}
288
289impl<'a> NullUnshredVariantBuilder<'a> {
290 fn new(nulls: Option<&'a NullBuffer>) -> Self {
291 Self { nulls }
292 }
293
294 fn append_row(
295 &mut self,
296 builder: &mut impl VariantBuilderExt,
297 _metadata: &VariantMetadata,
298 index: usize,
299 ) -> Result<()> {
300 if self.nulls.is_some_and(|nulls| nulls.is_null(index)) {
301 builder.append_null();
302 } else {
303 builder.append_value(Variant::Null);
304 }
305 Ok(())
306 }
307}
308
309struct ValueOnlyUnshredVariantBuilder<'a> {
311 value: &'a arrow::array::BinaryViewArray,
312}
313
314impl<'a> ValueOnlyUnshredVariantBuilder<'a> {
315 fn new(value: &'a BinaryViewArray) -> Self {
316 Self { value }
317 }
318
319 fn append_row(
320 &mut self,
321 builder: &mut impl VariantBuilderExt,
322 metadata: &VariantMetadata,
323 index: usize,
324 ) -> Result<()> {
325 if self.value.is_null(index) {
326 builder.append_null();
327 } else {
328 let variant = Variant::new_with_metadata(metadata.clone(), self.value.value(index));
329 builder.append_value(variant);
330 }
331 Ok(())
332 }
333}
334
335trait AppendToVariantBuilder: Array {
338 fn append_to_variant_builder(
339 &self,
340 builder: &mut impl VariantBuilderExt,
341 index: usize,
342 ) -> Result<()>;
343}
344
345macro_rules! handle_unshredded_case {
348 ($self:expr, $builder:expr, $metadata:expr, $index:expr, $partial_shredding:expr) => {{
349 let value = $self.value.as_ref().filter(|v| v.is_valid($index));
350 let value = value.map(|v| Variant::new_with_metadata($metadata.clone(), v.value($index)));
351
352 if $self.typed_value.is_null($index) {
354 match value {
355 Some(value) => $builder.append_value(value),
356 None => $builder.append_null(),
357 }
358 return Ok(());
359 }
360
361 if !$partial_shredding && value.is_some() {
363 return Err(ArrowError::InvalidArgumentError(
364 "Invalid shredded variant: both value and typed_value are non-null".to_string(),
365 ));
366 }
367
368 value
370 }};
371}
372
373struct UnshredPrimitiveRowBuilder<'a, T> {
375 value: Option<&'a BinaryViewArray>,
376 typed_value: &'a T,
377}
378
379impl<'a, T: AppendToVariantBuilder> UnshredPrimitiveRowBuilder<'a, T> {
380 fn new(value: Option<&'a BinaryViewArray>, typed_value: &'a T) -> Self {
381 Self { value, typed_value }
382 }
383
384 fn append_row(
385 &mut self,
386 builder: &mut impl VariantBuilderExt,
387 metadata: &VariantMetadata,
388 index: usize,
389 ) -> Result<()> {
390 handle_unshredded_case!(self, builder, metadata, index, false);
391
392 self.typed_value.append_to_variant_builder(builder, index)
394 }
395}
396
397macro_rules! impl_append_to_variant_builder {
399 ($array_type:ty $(, |$v:ident| $transform:expr)? ) => {
400 impl AppendToVariantBuilder for $array_type {
401 fn append_to_variant_builder(
402 &self,
403 builder: &mut impl VariantBuilderExt,
404 index: usize,
405 ) -> Result<()> {
406 let value = self.value(index);
407 $(
408 let $v = value;
409 let value = $transform;
410 )?
411 builder.append_value(value);
412 Ok(())
413 }
414 }
415 };
416}
417
418impl_append_to_variant_builder!(BooleanArray);
419impl_append_to_variant_builder!(StringArray);
420impl_append_to_variant_builder!(StringViewArray);
421impl_append_to_variant_builder!(LargeStringArray);
422impl_append_to_variant_builder!(BinaryArray);
423impl_append_to_variant_builder!(BinaryViewArray);
424impl_append_to_variant_builder!(LargeBinaryArray);
425impl_append_to_variant_builder!(PrimitiveArray<Int8Type>);
426impl_append_to_variant_builder!(PrimitiveArray<Int16Type>);
427impl_append_to_variant_builder!(PrimitiveArray<Int32Type>);
428impl_append_to_variant_builder!(PrimitiveArray<Int64Type>);
429impl_append_to_variant_builder!(PrimitiveArray<Float32Type>);
430impl_append_to_variant_builder!(PrimitiveArray<Float64Type>);
431
432impl_append_to_variant_builder!(PrimitiveArray<Date32Type>, |days_since_epoch| {
433 Date32Type::to_naive_date_opt(days_since_epoch).ok_or_else(|| {
434 ArrowError::InvalidArgumentError(format!("Invalid Date32 value: {days_since_epoch}"))
435 })?
436});
437
438impl_append_to_variant_builder!(
439 PrimitiveArray<Time64MicrosecondType>,
440 |micros_since_midnight| {
441 time64us_to_time(micros_since_midnight).ok_or_else(|| {
442 ArrowError::InvalidArgumentError(format!(
443 "Invalid Time64 microsecond value: {micros_since_midnight}"
444 ))
445 })?
446 }
447);
448
449impl_append_to_variant_builder!(FixedSizeBinaryArray, |bytes| {
452 Uuid::from_slice(bytes).unwrap()
453});
454
455trait TimestampType: ArrowPrimitiveType<Native = i64> {
457 fn to_datetime_utc(value: i64) -> Result<DateTime<Utc>>;
458}
459
460impl TimestampType for TimestampMicrosecondType {
461 fn to_datetime_utc(micros: i64) -> Result<DateTime<Utc>> {
462 DateTime::from_timestamp_micros(micros).ok_or_else(|| {
463 ArrowError::InvalidArgumentError(format!(
464 "Invalid timestamp microsecond value: {micros}"
465 ))
466 })
467 }
468}
469
470impl TimestampType for TimestampNanosecondType {
471 fn to_datetime_utc(nanos: i64) -> Result<DateTime<Utc>> {
472 Ok(DateTime::from_timestamp_nanos(nanos))
473 }
474}
475
476struct TimestampUnshredRowBuilder<'a, T: TimestampType> {
478 value: Option<&'a BinaryViewArray>,
479 typed_value: &'a PrimitiveArray<T>,
480 has_timezone: bool,
481}
482
483impl<'a, T: TimestampType> TimestampUnshredRowBuilder<'a, T> {
484 fn new(
485 value: Option<&'a BinaryViewArray>,
486 typed_value: &'a dyn Array,
487 has_timezone: bool,
488 ) -> Self {
489 Self {
490 value,
491 typed_value: typed_value.as_primitive(),
492 has_timezone,
493 }
494 }
495
496 fn append_row(
497 &mut self,
498 builder: &mut impl VariantBuilderExt,
499 metadata: &VariantMetadata,
500 index: usize,
501 ) -> Result<()> {
502 handle_unshredded_case!(self, builder, metadata, index, false);
503
504 let timestamp_value = self.typed_value.value(index);
506 let dt = T::to_datetime_utc(timestamp_value)?;
507 if self.has_timezone {
508 builder.append_value(dt);
509 } else {
510 builder.append_value(dt.naive_utc());
511 }
512 Ok(())
513 }
514}
515
516struct DecimalUnshredRowBuilder<'a, A: DecimalType, V>
518where
519 V: VariantDecimalType<Native = A::Native>,
520{
521 value: Option<&'a BinaryViewArray>,
522 typed_value: &'a PrimitiveArray<A>,
523 scale: i8,
524 _phantom: PhantomData<V>,
525}
526
527impl<'a, A: DecimalType, V> DecimalUnshredRowBuilder<'a, A, V>
528where
529 V: VariantDecimalType<Native = A::Native>,
530{
531 fn new(value: Option<&'a BinaryViewArray>, typed_value: &'a dyn Array, scale: i8) -> Self {
532 Self {
533 value,
534 typed_value: typed_value.as_primitive(),
535 scale,
536 _phantom: PhantomData,
537 }
538 }
539
540 fn append_row(
541 &mut self,
542 builder: &mut impl VariantBuilderExt,
543 metadata: &VariantMetadata,
544 index: usize,
545 ) -> Result<()> {
546 handle_unshredded_case!(self, builder, metadata, index, false);
547
548 let raw = self.typed_value.value(index);
549 let variant = V::try_new_with_signed_scale(raw, self.scale)?;
550 builder.append_value(variant);
551 Ok(())
552 }
553}
554
555struct StructUnshredVariantBuilder<'a> {
557 value: Option<&'a arrow::array::BinaryViewArray>,
558 typed_value: &'a arrow::array::StructArray,
559 field_unshredders: IndexMap<&'a str, Option<UnshredVariantRowBuilder<'a>>>,
560}
561
562impl<'a> StructUnshredVariantBuilder<'a> {
563 fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a StructArray) -> Result<Self> {
564 let mut field_unshredders = IndexMap::new();
566 for (field, field_array) in typed_value.fields().iter().zip(typed_value.columns()) {
567 let Some(field_array) = field_array.as_struct_opt() else {
569 return Err(ArrowError::InvalidArgumentError(format!(
570 "Invalid shredded variant object field: expected Struct, got {}",
571 field_array.data_type()
572 )));
573 };
574 let field_unshredder = UnshredVariantRowBuilder::try_new_opt(field_array.try_into()?)?;
575 field_unshredders.insert(field.name().as_ref(), field_unshredder);
576 }
577
578 Ok(Self {
579 value,
580 typed_value,
581 field_unshredders,
582 })
583 }
584
585 fn append_row(
586 &mut self,
587 builder: &mut impl VariantBuilderExt,
588 metadata: &VariantMetadata,
589 index: usize,
590 ) -> Result<()> {
591 let value = handle_unshredded_case!(self, builder, metadata, index, true);
592
593 let mut object_builder = builder.try_new_object()?;
595
596 for (field_name, field_unshredder_opt) in &mut self.field_unshredders {
598 if let Some(field_unshredder) = field_unshredder_opt {
599 let mut field_builder = ObjectFieldBuilder::new(field_name, &mut object_builder);
600 field_unshredder.append_row(&mut field_builder, metadata, index)?;
601 }
602 }
603
604 if let Some(value) = value {
606 let Variant::Object(object) = value else {
607 return Err(ArrowError::InvalidArgumentError(
608 "Expected object in value field for partially shredded struct".to_string(),
609 ));
610 };
611
612 for (field_name, field_value) in object.iter() {
613 if self.field_unshredders.contains_key(field_name) {
614 return Err(ArrowError::InvalidArgumentError(format!(
615 "Field '{field_name}' appears in both typed_value and value",
616 )));
617 }
618 object_builder.insert_bytes(field_name, field_value);
619 }
620 }
621
622 object_builder.finish();
623 Ok(())
624 }
625}
626
627struct ListUnshredVariantBuilder<'a, L: ListLikeArray> {
629 value: Option<&'a BinaryViewArray>,
630 typed_value: &'a L,
631 element_unshredder: Box<UnshredVariantRowBuilder<'a>>,
632}
633
634impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> {
635 fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a L) -> Result<Self> {
636 let element_values = typed_value.values();
639
640 let Some(element_values) = element_values.as_struct_opt() else {
643 return Err(ArrowError::InvalidArgumentError(format!(
644 "Invalid shredded variant array element: expected Struct, got {}",
645 element_values.data_type()
646 )));
647 };
648
649 let element_unshredder = UnshredVariantRowBuilder::try_new_opt(element_values.try_into()?)?
654 .unwrap_or_else(|| UnshredVariantRowBuilder::null(None));
655
656 Ok(Self {
657 value,
658 typed_value,
659 element_unshredder: Box::new(element_unshredder),
660 })
661 }
662
663 fn append_row(
664 &mut self,
665 builder: &mut impl VariantBuilderExt,
666 metadata: &VariantMetadata,
667 index: usize,
668 ) -> Result<()> {
669 handle_unshredded_case!(self, builder, metadata, index, false);
670
671 let mut list_builder = builder.try_new_list()?;
673 for element_index in self.typed_value.element_range(index) {
674 self.element_unshredder
675 .append_row(&mut list_builder, metadata, element_index)?;
676 }
677
678 list_builder.finish();
679 Ok(())
680 }
681}
682
683#[cfg(test)]
684mod tests {
685 use crate::VariantArray;
686 use arrow::array::{
687 BinaryArray, BinaryViewArray, LargeBinaryArray, LargeStringArray, StringViewArray,
688 };
689 use parquet_variant::Variant;
690
691 #[test]
692 fn test_unshred_utf8view_typed_value() {
693 let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
694 let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
695
696 let typed_value: arrow::array::ArrayRef = std::sync::Arc::new(StringViewArray::from(vec![
697 Some("hello"),
698 Some("middle"),
699 Some("world"),
700 ]));
701
702 let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
703
704 let result = crate::unshred_variant(&variant_array).unwrap();
705
706 assert_eq!(result.len(), 3);
707 assert_eq!(result.value(0), Variant::from("hello"));
708 assert_eq!(result.value(1), Variant::from("middle"));
709 assert_eq!(result.value(2), Variant::from("world"));
710 }
711
712 #[test]
713 fn test_unshred_largeutf8_typed_value() {
714 let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
715 let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
716
717 let typed_value: arrow::array::ArrayRef =
718 std::sync::Arc::new(LargeStringArray::from(vec![
719 Some("hello"),
720 Some("middle"),
721 Some("world"),
722 ]));
723
724 let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
725
726 let result = crate::unshred_variant(&variant_array).unwrap();
727
728 assert_eq!(result.len(), 3);
729 assert_eq!(result.value(0), Variant::from("hello"));
730 assert_eq!(result.value(1), Variant::from("middle"));
731 assert_eq!(result.value(2), Variant::from("world"));
732 }
733
734 #[test]
735 fn test_unshred_binary_typed_value() {
736 let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
737 let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
738
739 let typed_value: arrow::array::ArrayRef =
740 std::sync::Arc::new(BinaryArray::from_iter_values(vec![
741 &b"\x00\x01\x02"[..],
742 &b"\xff\xaa"[..],
743 &b"\xde\xad\xbe\xef"[..],
744 ]));
745
746 let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
747
748 let result = crate::unshred_variant(&variant_array).unwrap();
749
750 assert_eq!(result.len(), 3);
751 assert_eq!(result.value(0), Variant::from(&b"\x00\x01\x02"[..]));
752 assert_eq!(result.value(1), Variant::from(&b"\xff\xaa"[..]));
753 assert_eq!(result.value(2), Variant::from(&b"\xde\xad\xbe\xef"[..]));
754 }
755
756 #[test]
757 fn test_unshred_largebinary_typed_value() {
758 let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
759 let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
760
761 let typed_value: arrow::array::ArrayRef =
762 std::sync::Arc::new(LargeBinaryArray::from_iter_values(vec![
763 &b"\x00\x01\x02"[..],
764 &b"\xff\xaa"[..],
765 &b"\xde\xad\xbe\xef"[..],
766 ]));
767
768 let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
769
770 let result = crate::unshred_variant(&variant_array).unwrap();
771
772 assert_eq!(result.len(), 3);
773 assert_eq!(result.value(0), Variant::from(&b"\x00\x01\x02"[..]));
774 assert_eq!(result.value(1), Variant::from(&b"\xff\xaa"[..]));
775 assert_eq!(result.value(2), Variant::from(&b"\xde\xad\xbe\xef"[..]));
776 }
777}