1use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
21use crate::variant_to_arrow::{
22 PrimitiveVariantToArrowRowBuilder, make_primitive_variant_to_arrow_row_builder,
23};
24use crate::{VariantArray, VariantValueArrayBuilder};
25use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder};
26use arrow::buffer::NullBuffer;
27use arrow::compute::CastOptions;
28use arrow::datatypes::{DataType, Fields, TimeUnit};
29use arrow::error::{ArrowError, Result};
30use parquet_variant::{Variant, VariantBuilderExt};
31
32use indexmap::IndexMap;
33use std::sync::Arc;
34
35pub fn shred_variant(array: &VariantArray, as_type: &DataType) -> Result<VariantArray> {
67 if array.typed_value_field().is_some() {
68 return Err(ArrowError::InvalidArgumentError(
69 "Input is already shredded".to_string(),
70 ));
71 }
72
73 if array.value_field().is_none() {
74 return Ok(array.clone());
76 };
77
78 let cast_options = CastOptions::default();
79 let mut builder = make_variant_to_shredded_variant_arrow_row_builder(
80 as_type,
81 &cast_options,
82 array.len(),
83 true,
84 )?;
85 for i in 0..array.len() {
86 if array.is_null(i) {
87 builder.append_null()?;
88 } else {
89 builder.append_value(array.value(i))?;
90 }
91 }
92 let (value, typed_value, nulls) = builder.finish()?;
93 Ok(VariantArray::from_parts(
94 array.metadata_field().clone(),
95 Some(value),
96 Some(typed_value),
97 nulls,
98 ))
99}
100
101pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>(
102 data_type: &'a DataType,
103 cast_options: &'a CastOptions,
104 capacity: usize,
105 top_level: bool,
106) -> Result<VariantToShreddedVariantRowBuilder<'a>> {
107 let builder = match data_type {
108 DataType::Struct(fields) => {
109 let typed_value_builder = VariantToShreddedObjectVariantRowBuilder::try_new(
110 fields,
111 cast_options,
112 capacity,
113 top_level,
114 )?;
115 VariantToShreddedVariantRowBuilder::Object(typed_value_builder)
116 }
117 DataType::List(_)
118 | DataType::LargeList(_)
119 | DataType::ListView(_)
120 | DataType::LargeListView(_)
121 | DataType::FixedSizeList(..) => {
122 return Err(ArrowError::NotYetImplemented(
123 "Shredding variant array values as arrow lists".to_string(),
124 ));
125 }
126 DataType::Boolean
129 | DataType::Int8
130 | DataType::Int16
131 | DataType::Int32
132 | DataType::Int64
133 | DataType::Float32
134 | DataType::Float64
135 | DataType::Decimal32(..)
136 | DataType::Decimal64(..)
137 | DataType::Decimal128(..)
138 | DataType::Date32
139 | DataType::Time64(TimeUnit::Microsecond)
140 | DataType::Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _)
141 | DataType::Binary
142 | DataType::BinaryView
143 | DataType::Utf8
144 | DataType::Utf8View
145 | DataType::FixedSizeBinary(16) => {
147 let builder =
148 make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
149 let typed_value_builder =
150 VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity, top_level);
151 VariantToShreddedVariantRowBuilder::Primitive(typed_value_builder)
152 }
153 DataType::FixedSizeBinary(_) => {
154 return Err(ArrowError::InvalidArgumentError(format!("{data_type} is not a valid variant shredding type. Only FixedSizeBinary(16) for UUID is supported.")))
155 }
156 _ => {
157 return Err(ArrowError::InvalidArgumentError(format!("{data_type} is not a valid variant shredding type")))
158 }
159 };
160 Ok(builder)
161}
162
163pub(crate) enum VariantToShreddedVariantRowBuilder<'a> {
164 Primitive(VariantToShreddedPrimitiveVariantRowBuilder<'a>),
165 Object(VariantToShreddedObjectVariantRowBuilder<'a>),
166}
167impl<'a> VariantToShreddedVariantRowBuilder<'a> {
168 pub fn append_null(&mut self) -> Result<()> {
169 use VariantToShreddedVariantRowBuilder::*;
170 match self {
171 Primitive(b) => b.append_null(),
172 Object(b) => b.append_null(),
173 }
174 }
175
176 pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
177 use VariantToShreddedVariantRowBuilder::*;
178 match self {
179 Primitive(b) => b.append_value(value),
180 Object(b) => b.append_value(value),
181 }
182 }
183
184 pub fn finish(self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
185 use VariantToShreddedVariantRowBuilder::*;
186 match self {
187 Primitive(b) => b.finish(),
188 Object(b) => b.finish(),
189 }
190 }
191}
192
193pub(crate) struct VariantToShreddedPrimitiveVariantRowBuilder<'a> {
195 value_builder: VariantValueArrayBuilder,
196 typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
197 nulls: NullBufferBuilder,
198 top_level: bool,
199}
200
201impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> {
202 pub(crate) fn new(
203 typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
204 capacity: usize,
205 top_level: bool,
206 ) -> Self {
207 Self {
208 value_builder: VariantValueArrayBuilder::new(capacity),
209 typed_value_builder,
210 nulls: NullBufferBuilder::new(capacity),
211 top_level,
212 }
213 }
214 fn append_null(&mut self) -> Result<()> {
215 self.nulls.append(!self.top_level);
218 self.value_builder.append_null();
219 self.typed_value_builder.append_null()
220 }
221 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
222 self.nulls.append_non_null();
223 if self.typed_value_builder.append_value(&value)? {
224 self.value_builder.append_null();
225 } else {
226 self.value_builder.append_value(value);
227 }
228 Ok(true)
229 }
230 fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
231 Ok((
232 self.value_builder.build()?,
233 self.typed_value_builder.finish()?,
234 self.nulls.finish(),
235 ))
236 }
237}
238
239pub(crate) struct VariantToShreddedObjectVariantRowBuilder<'a> {
240 value_builder: VariantValueArrayBuilder,
241 typed_value_builders: IndexMap<&'a str, VariantToShreddedVariantRowBuilder<'a>>,
242 typed_value_nulls: NullBufferBuilder,
243 nulls: NullBufferBuilder,
244 top_level: bool,
245}
246
247impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
248 fn try_new(
249 fields: &'a Fields,
250 cast_options: &'a CastOptions,
251 capacity: usize,
252 top_level: bool,
253 ) -> Result<Self> {
254 let typed_value_builders = fields.iter().map(|field| {
255 let builder = make_variant_to_shredded_variant_arrow_row_builder(
256 field.data_type(),
257 cast_options,
258 capacity,
259 false,
260 )?;
261 Ok((field.name().as_str(), builder))
262 });
263 Ok(Self {
264 value_builder: VariantValueArrayBuilder::new(capacity),
265 typed_value_builders: typed_value_builders.collect::<Result<_>>()?,
266 typed_value_nulls: NullBufferBuilder::new(capacity),
267 nulls: NullBufferBuilder::new(capacity),
268 top_level,
269 })
270 }
271
272 fn append_null(&mut self) -> Result<()> {
273 self.nulls.append(!self.top_level);
276 self.value_builder.append_null();
277 self.typed_value_nulls.append_null();
278 for (_, typed_value_builder) in &mut self.typed_value_builders {
279 typed_value_builder.append_null()?;
280 }
281 Ok(())
282 }
283 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
284 let Variant::Object(ref obj) = value else {
285 self.nulls.append_non_null();
287 self.value_builder.append_value(value);
288 self.typed_value_nulls.append_null();
289 for (_, typed_value_builder) in &mut self.typed_value_builders {
290 typed_value_builder.append_null()?;
291 }
292 return Ok(false);
293 };
294
295 let mut builder = self.value_builder.builder_ext(value.metadata());
297 let mut object_builder = builder.try_new_object()?;
298 let mut seen = std::collections::HashSet::new();
299 let mut partially_shredded = false;
300 for (field_name, value) in obj.iter() {
301 match self.typed_value_builders.get_mut(field_name) {
302 Some(typed_value_builder) => {
303 typed_value_builder.append_value(value)?;
304 seen.insert(field_name);
305 }
306 None => {
307 object_builder.insert_bytes(field_name, value);
308 partially_shredded = true;
309 }
310 }
311 }
312
313 for (field_name, typed_value_builder) in &mut self.typed_value_builders {
315 if !seen.contains(field_name) {
316 typed_value_builder.append_null()?;
317 }
318 }
319
320 if partially_shredded {
322 object_builder.finish();
323 } else {
324 drop(object_builder);
325 self.value_builder.append_null();
326 }
327
328 self.typed_value_nulls.append_non_null();
329 self.nulls.append_non_null();
330 Ok(true)
331 }
332 fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
333 let mut builder = StructArrayBuilder::new();
334 for (field_name, typed_value_builder) in self.typed_value_builders {
335 let (value, typed_value, nulls) = typed_value_builder.finish()?;
336 let array =
337 ShreddedVariantFieldArray::from_parts(Some(value), Some(typed_value), nulls);
338 builder = builder.with_field(field_name, ArrayRef::from(array), false);
339 }
340 if let Some(nulls) = self.typed_value_nulls.finish() {
341 builder = builder.with_nulls(nulls);
342 }
343 Ok((
344 self.value_builder.build()?,
345 Arc::new(builder.build()),
346 self.nulls.finish(),
347 ))
348 }
349}
350
351#[cfg(test)]
352mod tests {
353 use super::*;
354 use crate::VariantArrayBuilder;
355 use arrow::array::{Array, FixedSizeBinaryArray, Float64Array, Int64Array};
356 use arrow::datatypes::{DataType, Field, Fields, TimeUnit, UnionFields, UnionMode};
357 use parquet_variant::{ObjectBuilder, ReadOnlyMetadataBuilder, Variant, VariantBuilder};
358 use std::sync::Arc;
359 use uuid::Uuid;
360
361 #[test]
362 fn test_already_shredded_input_error() {
363 let temp_array = VariantArray::from_iter(vec![Some(Variant::from("test"))]);
366 let metadata = temp_array.metadata_field().clone();
367 let value = temp_array.value_field().unwrap().clone();
368 let typed_value = Arc::new(Int64Array::from(vec![42])) as ArrayRef;
369
370 let shredded_array =
371 VariantArray::from_parts(metadata, Some(value), Some(typed_value), None);
372
373 let result = shred_variant(&shredded_array, &DataType::Int64);
374 assert!(matches!(
375 result.unwrap_err(),
376 ArrowError::InvalidArgumentError(_)
377 ));
378 }
379
380 #[test]
381 fn test_all_null_input() {
382 let metadata = BinaryViewArray::from_iter_values([&[1u8, 0u8]]); let all_null_array = VariantArray::from_parts(metadata, None, None, None);
385 let result = shred_variant(&all_null_array, &DataType::Int64).unwrap();
386
387 assert!(result.value_field().is_none());
389 assert!(result.typed_value_field().is_none());
390 }
391
392 #[test]
393 fn test_unsupported_list_schema() {
394 let input = VariantArray::from_iter([Variant::from(42)]);
395 let list_schema = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
396 shred_variant(&input, &list_schema).expect_err("unsupported");
397 }
398
399 #[test]
400 fn test_invalid_fixed_size_binary_shredding() {
401 let mock_uuid_1 = Uuid::new_v4();
402
403 let input = VariantArray::from_iter([Some(Variant::from(mock_uuid_1)), None]);
404
405 let err = shred_variant(&input, &DataType::FixedSizeBinary(17)).unwrap_err();
407
408 assert_eq!(
409 err.to_string(),
410 "Invalid argument error: FixedSizeBinary(17) is not a valid variant shredding type. Only FixedSizeBinary(16) for UUID is supported."
411 );
412 }
413
414 #[test]
415 fn test_uuid_shredding() {
416 let mock_uuid_1 = Uuid::new_v4();
417 let mock_uuid_2 = Uuid::new_v4();
418
419 let input = VariantArray::from_iter([
420 Some(Variant::from(mock_uuid_1)),
421 None,
422 Some(Variant::from(false)),
423 Some(Variant::from(mock_uuid_2)),
424 ]);
425
426 let variant_array = shred_variant(&input, &DataType::FixedSizeBinary(16)).unwrap();
427
428 let uuids = variant_array
444 .typed_value_field()
445 .unwrap()
446 .as_any()
447 .downcast_ref::<FixedSizeBinaryArray>()
448 .unwrap();
449
450 assert_eq!(uuids.len(), 4);
451
452 assert!(!uuids.is_null(0));
453
454 let got_uuid_1: &[u8] = uuids.value(0);
455 assert_eq!(got_uuid_1, mock_uuid_1.as_bytes());
456
457 assert!(uuids.is_null(1));
458 assert!(uuids.is_null(2));
459
460 assert!(!uuids.is_null(3));
461
462 let got_uuid_2: &[u8] = uuids.value(3);
463 assert_eq!(got_uuid_2, mock_uuid_2.as_bytes());
464 }
465
466 #[test]
467 fn test_primitive_shredding_comprehensive() {
468 let input = VariantArray::from_iter(vec![
470 Some(Variant::from(42i64)), Some(Variant::from("hello")), Some(Variant::from(100i64)), None, Some(Variant::Null), Some(Variant::from(3i8)), ]);
477
478 let result = shred_variant(&input, &DataType::Int64).unwrap();
479
480 let metadata_field = result.metadata_field();
482 let value_field = result.value_field().unwrap();
483 let typed_value_field = result
484 .typed_value_field()
485 .unwrap()
486 .as_any()
487 .downcast_ref::<Int64Array>()
488 .unwrap();
489
490 assert_eq!(result.len(), 6);
492
493 assert!(!result.is_null(0));
495 assert!(value_field.is_null(0)); assert!(!typed_value_field.is_null(0));
497 assert_eq!(typed_value_field.value(0), 42);
498
499 assert!(!result.is_null(1));
501 assert!(!value_field.is_null(1)); assert!(typed_value_field.is_null(1)); assert_eq!(
504 Variant::new(metadata_field.value(1), value_field.value(1)),
505 Variant::from("hello")
506 );
507
508 assert!(!result.is_null(2));
510 assert!(value_field.is_null(2));
511 assert_eq!(typed_value_field.value(2), 100);
512
513 assert!(result.is_null(3));
515
516 assert!(!result.is_null(4));
518 assert!(!value_field.is_null(4)); assert_eq!(
520 Variant::new(metadata_field.value(4), value_field.value(4)),
521 Variant::Null
522 );
523 assert!(typed_value_field.is_null(4));
524
525 assert!(!result.is_null(5));
527 assert!(value_field.is_null(5)); assert!(!typed_value_field.is_null(5));
529 assert_eq!(typed_value_field.value(5), 3);
530 }
531
532 #[test]
533 fn test_primitive_different_target_types() {
534 let input = VariantArray::from_iter(vec![
535 Variant::from(42i32),
536 Variant::from(3.15f64),
537 Variant::from("not_a_number"),
538 ]);
539
540 let result_int32 = shred_variant(&input, &DataType::Int32).unwrap();
542 let typed_value_int32 = result_int32
543 .typed_value_field()
544 .unwrap()
545 .as_any()
546 .downcast_ref::<arrow::array::Int32Array>()
547 .unwrap();
548 assert_eq!(typed_value_int32.value(0), 42);
549 assert!(typed_value_int32.is_null(1)); assert!(typed_value_int32.is_null(2)); let result_float64 = shred_variant(&input, &DataType::Float64).unwrap();
554 let typed_value_float64 = result_float64
555 .typed_value_field()
556 .unwrap()
557 .as_any()
558 .downcast_ref::<Float64Array>()
559 .unwrap();
560 assert_eq!(typed_value_float64.value(0), 42.0); assert_eq!(typed_value_float64.value(1), 3.15);
562 assert!(typed_value_float64.is_null(2)); }
564
565 #[test]
566 fn test_invalid_shredded_types_rejected() {
567 let input = VariantArray::from_iter([Variant::from(42)]);
568
569 let invalid_types = vec![
570 DataType::UInt8,
571 DataType::Float16,
572 DataType::Decimal256(38, 10),
573 DataType::Date64,
574 DataType::Time32(TimeUnit::Second),
575 DataType::Time64(TimeUnit::Nanosecond),
576 DataType::Timestamp(TimeUnit::Millisecond, None),
577 DataType::LargeBinary,
578 DataType::LargeUtf8,
579 DataType::FixedSizeBinary(17),
580 DataType::Union(
581 UnionFields::new(
582 vec![0_i8, 1_i8],
583 vec![
584 Field::new("int_field", DataType::Int32, false),
585 Field::new("str_field", DataType::Utf8, true),
586 ],
587 ),
588 UnionMode::Dense,
589 ),
590 DataType::Map(
591 Arc::new(Field::new(
592 "entries",
593 DataType::Struct(Fields::from(vec![
594 Field::new("key", DataType::Utf8, false),
595 Field::new("value", DataType::Int32, true),
596 ])),
597 false,
598 )),
599 false,
600 ),
601 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
602 DataType::RunEndEncoded(
603 Arc::new(Field::new("run_ends", DataType::Int32, false)),
604 Arc::new(Field::new("values", DataType::Utf8, true)),
605 ),
606 ];
607
608 for data_type in invalid_types {
609 let err = shred_variant(&input, &data_type).unwrap_err();
610 assert!(
611 matches!(err, ArrowError::InvalidArgumentError(_)),
612 "expected InvalidArgumentError for {:?}, got {:?}",
613 data_type,
614 err
615 );
616 }
617 }
618
619 #[test]
620 fn test_object_shredding_comprehensive() {
621 let mut builder = VariantArrayBuilder::new(7);
622
623 builder
625 .new_object()
626 .with_field("score", 95.5f64)
627 .with_field("age", 30i64)
628 .finish();
629
630 builder
632 .new_object()
633 .with_field("score", 87.2f64)
634 .with_field("age", 25i64)
635 .with_field("email", "bob@example.com")
636 .finish();
637
638 builder.new_object().with_field("age", 35i64).finish();
640
641 builder
643 .new_object()
644 .with_field("score", "ninety-five")
645 .with_field("age", "thirty")
646 .finish();
647
648 builder.append_variant(Variant::from("not an object"));
650
651 builder.new_object().finish();
653
654 builder.append_null();
656
657 builder.new_object().with_field("foo", 10).finish();
659
660 builder
662 .new_object()
663 .with_field("score", 66.67f64)
664 .with_field("foo", 10)
665 .finish();
666
667 let input = builder.build();
668
669 let fields = Fields::from(vec![
672 Field::new("score", DataType::Float64, true),
673 Field::new("age", DataType::Int64, true),
674 ]);
675 let target_schema = DataType::Struct(fields);
676
677 let result = shred_variant(&input, &target_schema).unwrap();
678
679 assert!(result.value_field().is_some());
681 assert!(result.typed_value_field().is_some());
682 assert_eq!(result.len(), 9);
683
684 let metadata = result.metadata_field();
685
686 let value = result.value_field().unwrap();
687 let typed_value = result
688 .typed_value_field()
689 .unwrap()
690 .as_any()
691 .downcast_ref::<arrow::array::StructArray>()
692 .unwrap();
693
694 let score_field =
696 ShreddedVariantFieldArray::try_new(typed_value.column_by_name("score").unwrap())
697 .unwrap();
698 let age_field =
699 ShreddedVariantFieldArray::try_new(typed_value.column_by_name("age").unwrap()).unwrap();
700
701 let score_value = score_field
702 .value_field()
703 .unwrap()
704 .as_any()
705 .downcast_ref::<BinaryViewArray>()
706 .unwrap();
707 let score_typed_value = score_field
708 .typed_value_field()
709 .unwrap()
710 .as_any()
711 .downcast_ref::<Float64Array>()
712 .unwrap();
713 let age_value = age_field
714 .value_field()
715 .unwrap()
716 .as_any()
717 .downcast_ref::<BinaryViewArray>()
718 .unwrap();
719 let age_typed_value = age_field
720 .typed_value_field()
721 .unwrap()
722 .as_any()
723 .downcast_ref::<Int64Array>()
724 .unwrap();
725
726 struct ShreddedValue<'m, 'v, T> {
728 value: Option<Variant<'m, 'v>>,
729 typed_value: Option<T>,
730 }
731 struct ShreddedStruct<'m, 'v> {
732 score: ShreddedValue<'m, 'v, f64>,
733 age: ShreddedValue<'m, 'v, i64>,
734 }
735 fn get_value<'m, 'v>(
736 i: usize,
737 metadata: &'m BinaryViewArray,
738 value: &'v BinaryViewArray,
739 ) -> Variant<'m, 'v> {
740 Variant::new(metadata.value(i), value.value(i))
741 }
742 let expect = |i, expected_result: Option<ShreddedValue<ShreddedStruct>>| {
743 match expected_result {
744 Some(ShreddedValue {
745 value: expected_value,
746 typed_value: expected_typed_value,
747 }) => {
748 assert!(result.is_valid(i));
749 match expected_value {
750 Some(expected_value) => {
751 assert!(value.is_valid(i));
752 assert_eq!(expected_value, get_value(i, metadata, value));
753 }
754 None => {
755 assert!(value.is_null(i));
756 }
757 }
758 match expected_typed_value {
759 Some(ShreddedStruct {
760 score: expected_score,
761 age: expected_age,
762 }) => {
763 assert!(typed_value.is_valid(i));
764 assert!(score_field.is_valid(i)); assert!(age_field.is_valid(i)); match expected_score.value {
767 Some(expected_score_value) => {
768 assert!(score_value.is_valid(i));
769 assert_eq!(
770 expected_score_value,
771 get_value(i, metadata, score_value)
772 );
773 }
774 None => {
775 assert!(score_value.is_null(i));
776 }
777 }
778 match expected_score.typed_value {
779 Some(expected_score) => {
780 assert!(score_typed_value.is_valid(i));
781 assert_eq!(expected_score, score_typed_value.value(i));
782 }
783 None => {
784 assert!(score_typed_value.is_null(i));
785 }
786 }
787 match expected_age.value {
788 Some(expected_age_value) => {
789 assert!(age_value.is_valid(i));
790 assert_eq!(
791 expected_age_value,
792 get_value(i, metadata, age_value)
793 );
794 }
795 None => {
796 assert!(age_value.is_null(i));
797 }
798 }
799 match expected_age.typed_value {
800 Some(expected_age) => {
801 assert!(age_typed_value.is_valid(i));
802 assert_eq!(expected_age, age_typed_value.value(i));
803 }
804 None => {
805 assert!(age_typed_value.is_null(i));
806 }
807 }
808 }
809 None => {
810 assert!(typed_value.is_null(i));
811 }
812 }
813 }
814 None => {
815 assert!(result.is_null(i));
816 }
817 };
818 };
819
820 expect(
822 0,
823 Some(ShreddedValue {
824 value: None,
825 typed_value: Some(ShreddedStruct {
826 score: ShreddedValue {
827 value: None,
828 typed_value: Some(95.5),
829 },
830 age: ShreddedValue {
831 value: None,
832 typed_value: Some(30),
833 },
834 }),
835 }),
836 );
837
838 let mut builder = VariantBuilder::new();
840 builder
841 .new_object()
842 .with_field("email", "bob@example.com")
843 .finish();
844 let (m, v) = builder.finish();
845 let expected_value = Variant::new(&m, &v);
846
847 expect(
848 1,
849 Some(ShreddedValue {
850 value: Some(expected_value),
851 typed_value: Some(ShreddedStruct {
852 score: ShreddedValue {
853 value: None,
854 typed_value: Some(87.2),
855 },
856 age: ShreddedValue {
857 value: None,
858 typed_value: Some(25),
859 },
860 }),
861 }),
862 );
863
864 expect(
866 2,
867 Some(ShreddedValue {
868 value: None,
869 typed_value: Some(ShreddedStruct {
870 score: ShreddedValue {
871 value: None,
872 typed_value: None,
873 },
874 age: ShreddedValue {
875 value: None,
876 typed_value: Some(35),
877 },
878 }),
879 }),
880 );
881
882 expect(
884 3,
885 Some(ShreddedValue {
886 value: None,
887 typed_value: Some(ShreddedStruct {
888 score: ShreddedValue {
889 value: Some(Variant::from("ninety-five")),
890 typed_value: None,
891 },
892 age: ShreddedValue {
893 value: Some(Variant::from("thirty")),
894 typed_value: None,
895 },
896 }),
897 }),
898 );
899
900 expect(
902 4,
903 Some(ShreddedValue {
904 value: Some(Variant::from("not an object")),
905 typed_value: None,
906 }),
907 );
908
909 expect(
911 5,
912 Some(ShreddedValue {
913 value: None,
914 typed_value: Some(ShreddedStruct {
915 score: ShreddedValue {
916 value: None,
917 typed_value: None,
918 },
919 age: ShreddedValue {
920 value: None,
921 typed_value: None,
922 },
923 }),
924 }),
925 );
926
927 expect(6, None);
929
930 let object_with_foo_field = |i| {
932 use parquet_variant::{ParentState, ValueBuilder, VariantMetadata};
933 let metadata = VariantMetadata::new(metadata.value(i));
934 let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
935 let mut value_builder = ValueBuilder::new();
936 let state = ParentState::variant(&mut value_builder, &mut metadata_builder);
937 ObjectBuilder::new(state, false)
938 .with_field("foo", 10)
939 .finish();
940 (metadata, value_builder.into_inner())
941 };
942
943 let (m, v) = object_with_foo_field(7);
945 expect(
946 7,
947 Some(ShreddedValue {
948 value: Some(Variant::new_with_metadata(m, &v)),
949 typed_value: Some(ShreddedStruct {
950 score: ShreddedValue {
951 value: None,
952 typed_value: None,
953 },
954 age: ShreddedValue {
955 value: None,
956 typed_value: None,
957 },
958 }),
959 }),
960 );
961
962 let (m, v) = object_with_foo_field(8);
964 expect(
965 8,
966 Some(ShreddedValue {
967 value: Some(Variant::new_with_metadata(m, &v)),
968 typed_value: Some(ShreddedStruct {
969 score: ShreddedValue {
970 value: None,
971 typed_value: Some(66.67),
972 },
973 age: ShreddedValue {
974 value: None,
975 typed_value: None,
976 },
977 }),
978 }),
979 );
980 }
981
982 #[test]
983 fn test_object_different_schemas() {
984 let mut builder = VariantArrayBuilder::new(1);
986 builder
987 .new_object()
988 .with_field("id", 123i32)
989 .with_field("age", 25i64)
990 .with_field("score", 95.5f64)
991 .finish();
992 let input = builder.build();
993
994 let schema1 = DataType::Struct(Fields::from(vec![Field::new("id", DataType::Int32, true)]));
996 let result1 = shred_variant(&input, &schema1).unwrap();
997 let value_field1 = result1.value_field().unwrap();
998 assert!(!value_field1.is_null(0)); let schema2 = DataType::Struct(Fields::from(vec![
1002 Field::new("id", DataType::Int32, true),
1003 Field::new("age", DataType::Int64, true),
1004 ]));
1005 let result2 = shred_variant(&input, &schema2).unwrap();
1006 let value_field2 = result2.value_field().unwrap();
1007 assert!(!value_field2.is_null(0)); let schema3 = DataType::Struct(Fields::from(vec![
1011 Field::new("id", DataType::Int32, true),
1012 Field::new("age", DataType::Int64, true),
1013 Field::new("score", DataType::Float64, true),
1014 ]));
1015 let result3 = shred_variant(&input, &schema3).unwrap();
1016 let value_field3 = result3.value_field().unwrap();
1017 assert!(value_field3.is_null(0)); }
1019
1020 #[test]
1021 fn test_uuid_shredding_in_objects() {
1022 let mock_uuid_1 = Uuid::new_v4();
1023 let mock_uuid_2 = Uuid::new_v4();
1024 let mock_uuid_3 = Uuid::new_v4();
1025
1026 let mut builder = VariantArrayBuilder::new(6);
1027
1028 builder
1030 .new_object()
1031 .with_field("id", mock_uuid_1)
1032 .with_field("session_id", mock_uuid_2)
1033 .finish();
1034
1035 builder
1037 .new_object()
1038 .with_field("id", mock_uuid_2)
1039 .with_field("session_id", mock_uuid_3)
1040 .with_field("name", "test_user")
1041 .finish();
1042
1043 builder.new_object().with_field("id", mock_uuid_1).finish();
1045
1046 builder
1048 .new_object()
1049 .with_field("id", mock_uuid_3)
1050 .with_field("session_id", "not-a-uuid")
1051 .finish();
1052
1053 builder
1055 .new_object()
1056 .with_field("id", 12345i64)
1057 .with_field("session_id", mock_uuid_1)
1058 .finish();
1059
1060 builder.append_null();
1062
1063 let input = builder.build();
1064
1065 let fields = Fields::from(vec![
1066 Field::new("id", DataType::FixedSizeBinary(16), true),
1067 Field::new("session_id", DataType::FixedSizeBinary(16), true),
1068 ]);
1069 let target_schema = DataType::Struct(fields);
1070
1071 let result = shred_variant(&input, &target_schema).unwrap();
1072
1073 assert!(result.value_field().is_some());
1074 assert!(result.typed_value_field().is_some());
1075 assert_eq!(result.len(), 6);
1076
1077 let metadata = result.metadata_field();
1078 let value = result.value_field().unwrap();
1079 let typed_value = result
1080 .typed_value_field()
1081 .unwrap()
1082 .as_any()
1083 .downcast_ref::<arrow::array::StructArray>()
1084 .unwrap();
1085
1086 let id_field =
1088 ShreddedVariantFieldArray::try_new(typed_value.column_by_name("id").unwrap()).unwrap();
1089 let session_id_field =
1090 ShreddedVariantFieldArray::try_new(typed_value.column_by_name("session_id").unwrap())
1091 .unwrap();
1092
1093 let id_value = id_field
1094 .value_field()
1095 .unwrap()
1096 .as_any()
1097 .downcast_ref::<BinaryViewArray>()
1098 .unwrap();
1099 let id_typed_value = id_field
1100 .typed_value_field()
1101 .unwrap()
1102 .as_any()
1103 .downcast_ref::<FixedSizeBinaryArray>()
1104 .unwrap();
1105 let session_id_value = session_id_field
1106 .value_field()
1107 .unwrap()
1108 .as_any()
1109 .downcast_ref::<BinaryViewArray>()
1110 .unwrap();
1111 let session_id_typed_value = session_id_field
1112 .typed_value_field()
1113 .unwrap()
1114 .as_any()
1115 .downcast_ref::<FixedSizeBinaryArray>()
1116 .unwrap();
1117
1118 assert!(result.is_valid(0));
1120
1121 assert!(value.is_null(0)); assert!(id_value.is_null(0));
1123 assert!(session_id_value.is_null(0));
1124
1125 assert!(typed_value.is_valid(0));
1126 assert!(id_typed_value.is_valid(0));
1127 assert!(session_id_typed_value.is_valid(0));
1128
1129 assert_eq!(id_typed_value.value(0), mock_uuid_1.as_bytes());
1130 assert_eq!(session_id_typed_value.value(0), mock_uuid_2.as_bytes());
1131
1132 assert!(result.is_valid(1));
1134
1135 assert!(value.is_valid(1)); assert!(typed_value.is_valid(1));
1137
1138 assert!(id_value.is_null(1));
1139 assert!(id_typed_value.is_valid(1));
1140 assert_eq!(id_typed_value.value(1), mock_uuid_2.as_bytes());
1141
1142 assert!(session_id_value.is_null(1));
1143 assert!(session_id_typed_value.is_valid(1));
1144 assert_eq!(session_id_typed_value.value(1), mock_uuid_3.as_bytes());
1145
1146 let row_1_variant = Variant::new(metadata.value(1), value.value(1));
1148 let Variant::Object(obj) = row_1_variant else {
1149 panic!("Expected object");
1150 };
1151
1152 assert_eq!(obj.get("name"), Some(Variant::from("test_user")));
1153
1154 assert!(result.is_valid(2));
1156
1157 assert!(value.is_null(2)); assert!(typed_value.is_valid(2));
1159
1160 assert!(id_value.is_null(2));
1161 assert!(id_typed_value.is_valid(2));
1162 assert_eq!(id_typed_value.value(2), mock_uuid_1.as_bytes());
1163
1164 assert!(session_id_value.is_null(2));
1165 assert!(session_id_typed_value.is_null(2)); assert!(result.is_valid(3));
1169
1170 assert!(value.is_null(3)); assert!(typed_value.is_valid(3));
1172
1173 assert!(id_value.is_null(3));
1174 assert!(id_typed_value.is_valid(3));
1175 assert_eq!(id_typed_value.value(3), mock_uuid_3.as_bytes());
1176
1177 assert!(session_id_value.is_valid(3)); assert!(session_id_typed_value.is_null(3));
1179 let session_id_variant = Variant::new(metadata.value(3), session_id_value.value(3));
1180 assert_eq!(session_id_variant, Variant::from("not-a-uuid"));
1181
1182 assert!(result.is_valid(4));
1184
1185 assert!(value.is_null(4)); assert!(typed_value.is_valid(4));
1187
1188 assert!(id_value.is_valid(4)); assert!(id_typed_value.is_null(4));
1190 let id_variant = Variant::new(metadata.value(4), id_value.value(4));
1191 assert_eq!(id_variant, Variant::from(12345i64));
1192
1193 assert!(session_id_value.is_null(4));
1194 assert!(session_id_typed_value.is_valid(4));
1195 assert_eq!(session_id_typed_value.value(4), mock_uuid_1.as_bytes());
1196
1197 assert!(result.is_null(5));
1199 }
1200
1201 #[test]
1202 fn test_spec_compliance() {
1203 let input = VariantArray::from_iter(vec![Variant::from(42i64), Variant::from("hello")]);
1204
1205 let result = shred_variant(&input, &DataType::Int64).unwrap();
1206
1207 let inner_struct = result.inner();
1209 assert!(inner_struct.column_by_name("metadata").is_some());
1210 assert!(inner_struct.column_by_name("value").is_some());
1211 assert!(inner_struct.column_by_name("typed_value").is_some());
1212
1213 assert_eq!(result.metadata_field().len(), input.metadata_field().len());
1215 assert_eq!(result.metadata_field().len(), input.metadata_field().len());
1218
1219 assert_eq!(result.len(), input.len());
1221 assert!(result.value_field().is_some());
1222 assert!(result.typed_value_field().is_some());
1223
1224 let value_field = result.value_field().unwrap();
1227 let typed_value_field = result
1228 .typed_value_field()
1229 .unwrap()
1230 .as_any()
1231 .downcast_ref::<Int64Array>()
1232 .unwrap();
1233
1234 for i in 0..result.len() {
1235 if !result.is_null(i) {
1236 let value_is_null = value_field.is_null(i);
1237 let typed_value_is_null = typed_value_field.is_null(i);
1238 assert!(
1240 value_is_null || typed_value_is_null,
1241 "Row {}: both value and typed_value are non-null for primitive shredding",
1242 i
1243 );
1244 }
1245 }
1246 }
1247}