1use arrow::{
18 array::{self, Array, ArrayRef, BinaryViewArray, StructArray},
19 compute::CastOptions,
20 datatypes::Field,
21 error::Result,
22};
23use arrow_schema::{ArrowError, DataType, FieldRef};
24use parquet_variant::{VariantPath, VariantPathElement};
25
26use crate::VariantArray;
27use crate::variant_array::BorrowedShreddingState;
28use crate::variant_to_arrow::make_variant_to_arrow_row_builder;
29
30use arrow::array::AsArray;
31use std::sync::Arc;
32
33pub(crate) enum ShreddedPathStep<'a> {
34 Success(BorrowedShreddingState<'a>),
36 Missing,
39 NotShredded,
43}
44
45pub(crate) fn follow_shredded_path_element<'a>(
51 shredding_state: &BorrowedShreddingState<'a>,
52 path_element: &VariantPathElement<'_>,
53 cast_options: &CastOptions,
54) -> Result<ShreddedPathStep<'a>> {
55 let missing_path_step = || match shredding_state.value_field() {
58 Some(_) => ShreddedPathStep::NotShredded,
59 None => ShreddedPathStep::Missing,
60 };
61
62 let Some(typed_value) = shredding_state.typed_value_field() else {
63 return Ok(missing_path_step());
64 };
65
66 match path_element {
67 VariantPathElement::Field { name } => {
68 let Some(struct_array) = typed_value.as_any().downcast_ref::<StructArray>() else {
71 if !cast_options.safe {
73 return Err(ArrowError::CastError(format!(
74 "Cannot access field '{}' on non-struct type: {}",
75 name,
76 typed_value.data_type()
77 )));
78 }
79 return Ok(missing_path_step());
81 };
82
83 let Some(field) = struct_array.column_by_name(name) else {
85 return Ok(missing_path_step());
87 };
88
89 let struct_array = field.as_struct_opt().ok_or_else(|| {
90 ArrowError::InvalidArgumentError(format!(
94 "Expected Struct array while following path, got {}",
95 field.data_type(),
96 ))
97 })?;
98
99 let state = BorrowedShreddingState::try_from(struct_array)?;
100 Ok(ShreddedPathStep::Success(state))
101 }
102 VariantPathElement::Index { .. } => {
103 Err(ArrowError::NotYetImplemented(
106 "Pathing into shredded variant array index".into(),
107 ))
108 }
109 }
110}
111
112fn shredded_get_path(
116 input: &VariantArray,
117 path: &[VariantPathElement<'_>],
118 as_field: Option<&Field>,
119 cast_options: &CastOptions,
120) -> Result<ArrayRef> {
121 let make_target_variant =
124 |value: Option<BinaryViewArray>,
125 typed_value: Option<ArrayRef>,
126 accumulated_nulls: Option<arrow::buffer::NullBuffer>| {
127 let metadata = input.metadata_field().clone();
128 VariantArray::from_parts(metadata, value, typed_value, accumulated_nulls)
129 };
130
131 let shred_basic_variant =
133 |target: VariantArray, path: VariantPath<'_>, as_field: Option<&Field>| {
134 let as_type = as_field.map(|f| f.data_type());
135 let mut builder = make_variant_to_arrow_row_builder(
136 target.metadata_field(),
137 path,
138 as_type,
139 cast_options,
140 target.len(),
141 )?;
142 for i in 0..target.len() {
143 if target.is_null(i) {
144 builder.append_null()?;
145 } else {
146 builder.append_value(target.value(i))?;
147 }
148 }
149 builder.finish()
150 };
151
152 let mut shredding_state = input.shredding_state().borrow();
155 let mut accumulated_nulls = input.inner().nulls().cloned();
156 let mut path_index = 0;
157 for path_element in path {
158 match follow_shredded_path_element(&shredding_state, path_element, cast_options)? {
159 ShreddedPathStep::Success(state) => {
160 if let Some(typed_value) = shredding_state.typed_value_field() {
162 accumulated_nulls = arrow::buffer::NullBuffer::union(
163 accumulated_nulls.as_ref(),
164 typed_value.nulls(),
165 );
166 }
167 shredding_state = state;
168 path_index += 1;
169 continue;
170 }
171 ShreddedPathStep::Missing => {
172 let num_rows = input.len();
173 let arr = match as_field.map(|f| f.data_type()) {
174 Some(data_type) => Arc::new(array::new_null_array(data_type, num_rows)) as _,
175 None => Arc::new(array::NullArray::new(num_rows)) as _,
176 };
177 return Ok(arr);
178 }
179 ShreddedPathStep::NotShredded => {
180 let target = make_target_variant(
181 shredding_state.value_field().cloned(),
182 None,
183 accumulated_nulls,
184 );
185 return shred_basic_variant(target, path[path_index..].into(), as_field);
186 }
187 };
188 }
189
190 let target = make_target_variant(
192 shredding_state.value_field().cloned(),
193 shredding_state.typed_value_field().cloned(),
194 accumulated_nulls,
195 );
196
197 let Some(as_field) = as_field else {
199 return Ok(ArrayRef::from(target));
200 };
201
202 if let DataType::Struct(fields) = as_field.data_type() {
205 let children = fields
206 .iter()
207 .map(|field| {
208 shredded_get_path(
209 &target,
210 &[VariantPathElement::from(field.name().as_str())],
211 Some(field),
212 cast_options,
213 )
214 })
215 .collect::<Result<Vec<_>>>()?;
216
217 let struct_nulls = target.nulls().cloned();
218
219 return Ok(Arc::new(StructArray::try_new(
220 fields.clone(),
221 children,
222 struct_nulls,
223 )?));
224 }
225
226 shred_basic_variant(target, VariantPath::default(), Some(as_field))
228}
229
230pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
242 let variant_array = VariantArray::try_new(input)?;
243
244 let GetOptions {
245 as_type,
246 path,
247 cast_options,
248 } = options;
249
250 shredded_get_path(&variant_array, &path, as_type.as_deref(), &cast_options)
251}
252
253#[derive(Debug, Clone, Default)]
255pub struct GetOptions<'a> {
256 pub path: VariantPath<'a>,
258 pub as_type: Option<FieldRef>,
262 pub cast_options: CastOptions<'a>,
264}
265
266impl<'a> GetOptions<'a> {
267 pub fn new() -> Self {
269 Default::default()
270 }
271
272 pub fn new_with_path(path: VariantPath<'a>) -> Self {
274 Self {
275 path,
276 as_type: None,
277 cast_options: Default::default(),
278 }
279 }
280
281 pub fn with_as_type(mut self, as_type: Option<FieldRef>) -> Self {
283 self.as_type = as_type;
284 self
285 }
286
287 pub fn with_cast_options(mut self, cast_options: CastOptions<'a>) -> Self {
289 self.cast_options = cast_options;
290 self
291 }
292}
293
294#[cfg(test)]
295mod test {
296 use std::sync::Arc;
297
298 use super::{GetOptions, variant_get};
299 use crate::VariantArray;
300 use crate::json_to_variant;
301 use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
302 use arrow::array::{
303 Array, ArrayRef, AsArray, BinaryViewArray, Date32Array, Float32Array, Float64Array,
304 Int8Array, Int16Array, Int32Array, Int64Array, StringArray, StructArray,
305 };
306 use arrow::buffer::NullBuffer;
307 use arrow::compute::CastOptions;
308 use arrow::datatypes::DataType::{Int16, Int32, Int64};
309 use arrow_schema::{DataType, Field, FieldRef, Fields};
310 use chrono::DateTime;
311 use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, Variant, VariantPath};
312
313 fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) {
314 let input_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(input_json)]));
316 let input_variant_array_ref = ArrayRef::from(json_to_variant(&input_array_ref).unwrap());
317
318 let result =
319 variant_get(&input_variant_array_ref, GetOptions::new_with_path(path)).unwrap();
320
321 let expected_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(expected_json)]));
323 let expected_variant_array = json_to_variant(&expected_array_ref).unwrap();
324
325 let result_array = VariantArray::try_new(&result).unwrap();
326 assert_eq!(
327 result_array.len(),
328 1,
329 "Expected result array to have length 1"
330 );
331 assert!(
332 result_array.nulls().is_none(),
333 "Expected no nulls in result array"
334 );
335 let result_variant = result_array.value(0);
336 let expected_variant = expected_variant_array.value(0);
337 assert_eq!(
338 result_variant, expected_variant,
339 "Result variant does not match expected variant"
340 );
341 }
342
343 #[test]
344 fn get_primitive_variant_field() {
345 single_variant_get_test(
346 r#"{"some_field": 1234}"#,
347 VariantPath::from("some_field"),
348 "1234",
349 );
350 }
351
352 #[test]
353 fn get_primitive_variant_list_index() {
354 single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
355 }
356
357 #[test]
358 fn get_primitive_variant_inside_object_of_object() {
359 single_variant_get_test(
360 r#"{"top_level_field": {"inner_field": 1234}}"#,
361 VariantPath::from("top_level_field").join("inner_field"),
362 "1234",
363 );
364 }
365
366 #[test]
367 fn get_primitive_variant_inside_list_of_object() {
368 single_variant_get_test(
369 r#"[{"some_field": 1234}]"#,
370 VariantPath::from(0).join("some_field"),
371 "1234",
372 );
373 }
374
375 #[test]
376 fn get_primitive_variant_inside_object_of_list() {
377 single_variant_get_test(
378 r#"{"some_field": [1234]}"#,
379 VariantPath::from("some_field").join(0),
380 "1234",
381 );
382 }
383
384 #[test]
385 fn get_complex_variant() {
386 single_variant_get_test(
387 r#"{"top_level_field": {"inner_field": 1234}}"#,
388 VariantPath::from("top_level_field"),
389 r#"{"inner_field": 1234}"#,
390 );
391 }
392
393 macro_rules! numeric_partially_shredded_test {
395 ($primitive_type:ty, $data_fn:ident) => {
396 let array = $data_fn();
397 let options = GetOptions::new();
398 let result = variant_get(&array, options).unwrap();
399
400 let result = VariantArray::try_new(&result).unwrap();
402 assert_eq!(result.len(), 4);
403
404 assert_eq!(
406 result.value(0),
407 Variant::from(<$primitive_type>::try_from(34u8).unwrap())
408 );
409 assert!(!result.is_valid(1));
410 assert_eq!(result.value(2), Variant::from("n/a"));
411 assert_eq!(
412 result.value(3),
413 Variant::from(<$primitive_type>::try_from(100u8).unwrap())
414 );
415 };
416 }
417
418 macro_rules! partially_shredded_variant_array_gen {
419 ($func_name:ident, $typed_value_array_gen: expr) => {
420 fn $func_name() -> ArrayRef {
421 let (metadata, string_value) = {
422 let mut builder = parquet_variant::VariantBuilder::new();
423 builder.append_value("n/a");
424 builder.finish()
425 };
426
427 let nulls = NullBuffer::from(vec![
428 true, false, true, true, ]);
433
434 let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
436
437 let values = BinaryViewArray::from(vec![
440 None, Some(b"" as &[u8]), Some(&string_value), None, ]);
445
446 let typed_value = $typed_value_array_gen();
447
448 let struct_array = StructArrayBuilder::new()
449 .with_field("metadata", Arc::new(metadata), false)
450 .with_field("typed_value", Arc::new(typed_value), true)
451 .with_field("value", Arc::new(values), true)
452 .with_nulls(nulls)
453 .build();
454 ArrayRef::from(
455 VariantArray::try_new(&struct_array).expect("should create variant array"),
456 )
457 }
458 };
459 }
460
461 #[test]
462 fn get_variant_partially_shredded_int8_as_variant() {
463 numeric_partially_shredded_test!(i8, partially_shredded_int8_variant_array);
464 }
465
466 #[test]
467 fn get_variant_partially_shredded_int16_as_variant() {
468 numeric_partially_shredded_test!(i16, partially_shredded_int16_variant_array);
469 }
470
471 #[test]
472 fn get_variant_partially_shredded_int32_as_variant() {
473 numeric_partially_shredded_test!(i32, partially_shredded_int32_variant_array);
474 }
475
476 #[test]
477 fn get_variant_partially_shredded_int64_as_variant() {
478 numeric_partially_shredded_test!(i64, partially_shredded_int64_variant_array);
479 }
480
481 #[test]
482 fn get_variant_partially_shredded_float32_as_variant() {
483 numeric_partially_shredded_test!(f32, partially_shredded_float32_variant_array);
484 }
485
486 #[test]
487 fn get_variant_partially_shredded_float64_as_variant() {
488 numeric_partially_shredded_test!(f64, partially_shredded_float64_variant_array);
489 }
490
491 #[test]
492 fn get_variant_partially_shredded_bool_as_variant() {
493 let array = partially_shredded_bool_variant_array();
494 let options = GetOptions::new();
495 let result = variant_get(&array, options).unwrap();
496
497 let result = VariantArray::try_new(&result).unwrap();
499 assert_eq!(result.len(), 4);
500
501 assert_eq!(result.value(0), Variant::from(true));
503 assert!(!result.is_valid(1));
504 assert_eq!(result.value(2), Variant::from("n/a"));
505 assert_eq!(result.value(3), Variant::from(false));
506 }
507
508 #[test]
509 fn get_variant_partially_shredded_utf8_as_variant() {
510 let array = partially_shredded_utf8_variant_array();
511 let options = GetOptions::new();
512 let result = variant_get(&array, options).unwrap();
513
514 let result = VariantArray::try_new(&result).unwrap();
516 assert_eq!(result.len(), 4);
517
518 assert_eq!(result.value(0), Variant::from("hello"));
520 assert!(!result.is_valid(1));
521 assert_eq!(result.value(2), Variant::from("n/a"));
522 assert_eq!(result.value(3), Variant::from("world"));
523 }
524
525 partially_shredded_variant_array_gen!(partially_shredded_binary_view_variant_array, || {
526 BinaryViewArray::from(vec![
527 Some(&[1u8, 2u8, 3u8][..]), None, None, Some(&[4u8, 5u8, 6u8][..]), ])
532 });
533
534 #[test]
535 fn get_variant_partially_shredded_date32_as_variant() {
536 let array = partially_shredded_date32_variant_array();
537 let options = GetOptions::new();
538 let result = variant_get(&array, options).unwrap();
539
540 let result = VariantArray::try_new(&result).unwrap();
542 assert_eq!(result.len(), 4);
543
544 use chrono::NaiveDate;
546 let date1 = NaiveDate::from_ymd_opt(2025, 9, 17).unwrap();
547 let date2 = NaiveDate::from_ymd_opt(2025, 9, 9).unwrap();
548 assert_eq!(result.value(0), Variant::from(date1));
549 assert!(!result.is_valid(1));
550 assert_eq!(result.value(2), Variant::from("n/a"));
551 assert_eq!(result.value(3), Variant::from(date2));
552 }
553
554 #[test]
555 fn get_variant_partially_shredded_binary_view_as_variant() {
556 let array = partially_shredded_binary_view_variant_array();
557 let options = GetOptions::new();
558 let result = variant_get(&array, options).unwrap();
559
560 let result = VariantArray::try_new(&result).unwrap();
562 assert_eq!(result.len(), 4);
563
564 assert_eq!(result.value(0), Variant::from(&[1u8, 2u8, 3u8][..]));
566 assert!(!result.is_valid(1));
567 assert_eq!(result.value(2), Variant::from("n/a"));
568 assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..]));
569 }
570
571 #[test]
573 fn get_variant_shredded_int32_as_int32_safe_cast() {
574 let array = partially_shredded_int32_variant_array();
576 let field = Field::new("typed_value", DataType::Int32, true);
578 let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
579 let result = variant_get(&array, options).unwrap();
580 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
581 Some(34),
582 None,
583 None, Some(100),
585 ]));
586 assert_eq!(&result, &expected)
587 }
588
589 #[test]
591 fn get_variant_shredded_int32_as_int32_unsafe_cast() {
592 let array = partially_shredded_int32_variant_array();
594 let field = Field::new("typed_value", DataType::Int32, true);
595 let cast_options = CastOptions {
596 safe: false, ..Default::default()
598 };
599 let options = GetOptions::new()
600 .with_as_type(Some(FieldRef::from(field)))
601 .with_cast_options(cast_options);
602
603 let err = variant_get(&array, options).unwrap_err();
604 assert_eq!(
606 err.to_string(),
607 "Cast error: Failed to extract primitive of type Int32 from variant ShortString(ShortString(\"n/a\")) at path VariantPath([])"
608 );
609 }
610
611 macro_rules! numeric_perfectly_shredded_test {
613 ($primitive_type:ty, $data_fn:ident) => {
614 let array = $data_fn();
615 let options = GetOptions::new();
616 let result = variant_get(&array, options).unwrap();
617
618 let result = VariantArray::try_new(&result).unwrap();
620 assert_eq!(result.len(), 3);
621
622 assert_eq!(
624 result.value(0),
625 Variant::from(<$primitive_type>::try_from(1u8).unwrap())
626 );
627 assert_eq!(
628 result.value(1),
629 Variant::from(<$primitive_type>::try_from(2u8).unwrap())
630 );
631 assert_eq!(
632 result.value(2),
633 Variant::from(<$primitive_type>::try_from(3u8).unwrap())
634 );
635 };
636 }
637
638 #[test]
639 fn get_variant_perfectly_shredded_int8_as_variant() {
640 numeric_perfectly_shredded_test!(i8, perfectly_shredded_int8_variant_array);
641 }
642
643 #[test]
644 fn get_variant_perfectly_shredded_int16_as_variant() {
645 numeric_perfectly_shredded_test!(i16, perfectly_shredded_int16_variant_array);
646 }
647
648 #[test]
649 fn get_variant_perfectly_shredded_int32_as_variant() {
650 numeric_perfectly_shredded_test!(i32, perfectly_shredded_int32_variant_array);
651 }
652
653 #[test]
654 fn get_variant_perfectly_shredded_int64_as_variant() {
655 numeric_perfectly_shredded_test!(i64, perfectly_shredded_int64_variant_array);
656 }
657
658 #[test]
659 fn get_variant_perfectly_shredded_float32_as_variant() {
660 numeric_perfectly_shredded_test!(f32, perfectly_shredded_float32_variant_array);
661 }
662
663 #[test]
664 fn get_variant_perfectly_shredded_float64_as_variant() {
665 numeric_perfectly_shredded_test!(f64, perfectly_shredded_float64_variant_array);
666 }
667
668 #[test]
670 fn get_variant_all_null_as_variant() {
671 let array = all_null_variant_array();
672 let options = GetOptions::new();
673 let result = variant_get(&array, options).unwrap();
674
675 let result = VariantArray::try_new(&result).unwrap();
677 assert_eq!(result.len(), 3);
678
679 assert!(!result.is_valid(0));
681 assert!(!result.is_valid(1));
682 assert!(!result.is_valid(2));
683 }
684
685 #[test]
687 fn get_variant_all_null_as_int32() {
688 let array = all_null_variant_array();
689 let field = Field::new("typed_value", DataType::Int32, true);
691 let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
692 let result = variant_get(&array, options).unwrap();
693
694 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
695 Option::<i32>::None,
696 Option::<i32>::None,
697 Option::<i32>::None,
698 ]));
699 assert_eq!(&result, &expected)
700 }
701
702 macro_rules! perfectly_shredded_to_arrow_primitive_test {
703 ($name:ident, $primitive_type:ident, $perfectly_shredded_array_gen_fun:ident, $expected_array:expr) => {
704 #[test]
705 fn $name() {
706 let array = $perfectly_shredded_array_gen_fun();
707 let field = Field::new("typed_value", $primitive_type, true);
708 let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
709 let result = variant_get(&array, options).unwrap();
710 let expected_array: ArrayRef = Arc::new($expected_array);
711 assert_eq!(&result, &expected_array);
712 }
713 };
714 }
715
716 perfectly_shredded_to_arrow_primitive_test!(
717 get_variant_perfectly_shredded_int16_as_int16,
718 Int16,
719 perfectly_shredded_int16_variant_array,
720 Int16Array::from(vec![Some(1), Some(2), Some(3)])
721 );
722
723 perfectly_shredded_to_arrow_primitive_test!(
724 get_variant_perfectly_shredded_int32_as_int32,
725 Int32,
726 perfectly_shredded_int32_variant_array,
727 Int32Array::from(vec![Some(1), Some(2), Some(3)])
728 );
729
730 perfectly_shredded_to_arrow_primitive_test!(
731 get_variant_perfectly_shredded_int64_as_int64,
732 Int64,
733 perfectly_shredded_int64_variant_array,
734 Int64Array::from(vec![Some(1), Some(2), Some(3)])
735 );
736
737 macro_rules! numeric_perfectly_shredded_variant_array_fn {
749 ($func:ident, $array_type:ident, $primitive_type:ty) => {
750 fn $func() -> ArrayRef {
751 let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
754 EMPTY_VARIANT_METADATA_BYTES,
755 3,
756 ));
757 let typed_value = $array_type::from(vec![
758 Some(<$primitive_type>::try_from(1u8).unwrap()),
759 Some(<$primitive_type>::try_from(2u8).unwrap()),
760 Some(<$primitive_type>::try_from(3u8).unwrap()),
761 ]);
762
763 let struct_array = StructArrayBuilder::new()
764 .with_field("metadata", Arc::new(metadata), false)
765 .with_field("typed_value", Arc::new(typed_value), true)
766 .build();
767
768 VariantArray::try_new(&struct_array)
769 .expect("should create variant array")
770 .into()
771 }
772 };
773 }
774
775 numeric_perfectly_shredded_variant_array_fn!(
776 perfectly_shredded_int8_variant_array,
777 Int8Array,
778 i8
779 );
780 numeric_perfectly_shredded_variant_array_fn!(
781 perfectly_shredded_int16_variant_array,
782 Int16Array,
783 i16
784 );
785 numeric_perfectly_shredded_variant_array_fn!(
786 perfectly_shredded_int32_variant_array,
787 Int32Array,
788 i32
789 );
790 numeric_perfectly_shredded_variant_array_fn!(
791 perfectly_shredded_int64_variant_array,
792 Int64Array,
793 i64
794 );
795 numeric_perfectly_shredded_variant_array_fn!(
796 perfectly_shredded_float32_variant_array,
797 Float32Array,
798 f32
799 );
800 numeric_perfectly_shredded_variant_array_fn!(
801 perfectly_shredded_float64_variant_array,
802 Float64Array,
803 f64
804 );
805
806 macro_rules! assert_variant_get_as_variant_array_with_default_option {
807 ($variant_array: expr, $array_expected: expr) => {{
808 let options = GetOptions::new();
809 let array = $variant_array;
810 let result = variant_get(&array, options).unwrap();
811
812 let result = VariantArray::try_new(&result).unwrap();
814
815 assert_eq!(result.len(), $array_expected.len());
816
817 for (idx, item) in $array_expected.into_iter().enumerate() {
818 match item {
819 Some(item) => assert_eq!(result.value(idx), item),
820 None => assert!(result.is_null(idx)),
821 }
822 }
823 }};
824 }
825
826 partially_shredded_variant_array_gen!(
827 partially_shredded_timestamp_micro_ntz_variant_array,
828 || {
829 arrow::array::TimestampMicrosecondArray::from(vec![
830 Some(-456000),
831 None,
832 None,
833 Some(1758602096000000),
834 ])
835 }
836 );
837
838 #[test]
839 fn get_variant_partial_shredded_timestamp_micro_ntz_as_variant() {
840 let array = partially_shredded_timestamp_micro_ntz_variant_array();
841 assert_variant_get_as_variant_array_with_default_option!(
842 array,
843 vec![
844 Some(Variant::from(
845 DateTime::from_timestamp_micros(-456000i64)
846 .unwrap()
847 .naive_utc(),
848 )),
849 None,
850 Some(Variant::from("n/a")),
851 Some(Variant::from(
852 DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
853 .unwrap()
854 .naive_utc(),
855 )),
856 ]
857 )
858 }
859
860 partially_shredded_variant_array_gen!(partially_shredded_timestamp_micro_variant_array, || {
861 arrow::array::TimestampMicrosecondArray::from(vec![
862 Some(-456000),
863 None,
864 None,
865 Some(1758602096000000),
866 ])
867 .with_timezone("+00:00")
868 });
869
870 #[test]
871 fn get_variant_partial_shredded_timestamp_micro_as_variant() {
872 let array = partially_shredded_timestamp_micro_variant_array();
873 assert_variant_get_as_variant_array_with_default_option!(
874 array,
875 vec![
876 Some(Variant::from(
877 DateTime::from_timestamp_micros(-456000i64)
878 .unwrap()
879 .to_utc(),
880 )),
881 None,
882 Some(Variant::from("n/a")),
883 Some(Variant::from(
884 DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
885 .unwrap()
886 .to_utc(),
887 )),
888 ]
889 )
890 }
891
892 partially_shredded_variant_array_gen!(
893 partially_shredded_timestamp_nano_ntz_variant_array,
894 || {
895 arrow::array::TimestampNanosecondArray::from(vec![
896 Some(-4999999561),
897 None,
898 None,
899 Some(1758602096000000000),
900 ])
901 }
902 );
903
904 #[test]
905 fn get_variant_partial_shredded_timestamp_nano_ntz_as_variant() {
906 let array = partially_shredded_timestamp_nano_ntz_variant_array();
907
908 assert_variant_get_as_variant_array_with_default_option!(
909 array,
910 vec![
911 Some(Variant::from(
912 DateTime::from_timestamp(-5, 439).unwrap().naive_utc()
913 )),
914 None,
915 Some(Variant::from("n/a")),
916 Some(Variant::from(
917 DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
918 .unwrap()
919 .naive_utc()
920 )),
921 ]
922 )
923 }
924
925 partially_shredded_variant_array_gen!(partially_shredded_timestamp_nano_variant_array, || {
926 arrow::array::TimestampNanosecondArray::from(vec![
927 Some(-4999999561),
928 None,
929 None,
930 Some(1758602096000000000),
931 ])
932 .with_timezone("+00:00")
933 });
934
935 #[test]
936 fn get_variant_partial_shredded_timestamp_nano_as_variant() {
937 let array = partially_shredded_timestamp_nano_variant_array();
938
939 assert_variant_get_as_variant_array_with_default_option!(
940 array,
941 vec![
942 Some(Variant::from(
943 DateTime::from_timestamp(-5, 439).unwrap().to_utc()
944 )),
945 None,
946 Some(Variant::from("n/a")),
947 Some(Variant::from(
948 DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
949 .unwrap()
950 .to_utc()
951 )),
952 ]
953 )
954 }
955
956 macro_rules! numeric_partially_shredded_variant_array_fn {
980 ($func:ident, $array_type:ident, $primitive_type:ty) => {
981 partially_shredded_variant_array_gen!($func, || $array_type::from(vec![
982 Some(<$primitive_type>::try_from(34u8).unwrap()), None, None, Some(<$primitive_type>::try_from(100u8).unwrap()), ]));
987 };
988 }
989
990 macro_rules! partially_shredded_variant_array_gen {
991 ($func:ident, $typed_array_gen: expr) => {
992 fn $func() -> ArrayRef {
993 let (metadata, string_value) = {
996 let mut builder = parquet_variant::VariantBuilder::new();
997 builder.append_value("n/a");
998 builder.finish()
999 };
1000
1001 let nulls = NullBuffer::from(vec![
1002 true, false, true, true, ]);
1007
1008 let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
1010
1011 let values = BinaryViewArray::from(vec![
1014 None, Some(b"" as &[u8]), Some(&string_value), None, ]);
1019
1020 let typed_value = $typed_array_gen();
1021
1022 let struct_array = StructArrayBuilder::new()
1023 .with_field("metadata", Arc::new(metadata), false)
1024 .with_field("typed_value", Arc::new(typed_value), true)
1025 .with_field("value", Arc::new(values), true)
1026 .with_nulls(nulls)
1027 .build();
1028
1029 ArrayRef::from(
1030 VariantArray::try_new(&struct_array).expect("should create variant array"),
1031 )
1032 }
1033 };
1034 }
1035
1036 numeric_partially_shredded_variant_array_fn!(
1037 partially_shredded_int8_variant_array,
1038 Int8Array,
1039 i8
1040 );
1041 numeric_partially_shredded_variant_array_fn!(
1042 partially_shredded_int16_variant_array,
1043 Int16Array,
1044 i16
1045 );
1046 numeric_partially_shredded_variant_array_fn!(
1047 partially_shredded_int32_variant_array,
1048 Int32Array,
1049 i32
1050 );
1051 numeric_partially_shredded_variant_array_fn!(
1052 partially_shredded_int64_variant_array,
1053 Int64Array,
1054 i64
1055 );
1056 numeric_partially_shredded_variant_array_fn!(
1057 partially_shredded_float32_variant_array,
1058 Float32Array,
1059 f32
1060 );
1061 numeric_partially_shredded_variant_array_fn!(
1062 partially_shredded_float64_variant_array,
1063 Float64Array,
1064 f64
1065 );
1066
1067 partially_shredded_variant_array_gen!(partially_shredded_bool_variant_array, || {
1068 arrow::array::BooleanArray::from(vec![
1069 Some(true), None, None, Some(false), ])
1074 });
1075
1076 partially_shredded_variant_array_gen!(partially_shredded_utf8_variant_array, || {
1077 StringArray::from(vec![
1078 Some("hello"), None, None, Some("world"), ])
1083 });
1084
1085 partially_shredded_variant_array_gen!(partially_shredded_date32_variant_array, || {
1086 Date32Array::from(vec![
1087 Some(20348), None, None, Some(20340), ])
1092 });
1093
1094 fn all_null_variant_array() -> ArrayRef {
1111 let nulls = NullBuffer::from(vec![
1112 false, false, false, ]);
1116
1117 let metadata =
1119 BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3));
1120
1121 let struct_array = StructArrayBuilder::new()
1122 .with_field("metadata", Arc::new(metadata), false)
1123 .with_nulls(nulls)
1124 .build();
1125
1126 Arc::new(struct_array)
1127 }
1128 #[test]
1132 fn test_shredded_object_field_access() {
1133 let array = shredded_object_with_x_field_variant_array();
1134
1135 let options = GetOptions::new_with_path(VariantPath::from("x"));
1137 let result = variant_get(&array, options).unwrap();
1138
1139 let result_variant = VariantArray::try_new(&result).unwrap();
1140 assert_eq!(result_variant.len(), 2);
1141
1142 assert_eq!(result_variant.value(0), Variant::Int32(1));
1144 assert_eq!(result_variant.value(1), Variant::Int32(42));
1146 }
1147
1148 #[test]
1150 fn test_shredded_object_field_as_int32() {
1151 let array = shredded_object_with_x_field_variant_array();
1152
1153 let field = Field::new("x", DataType::Int32, false);
1155 let options = GetOptions::new_with_path(VariantPath::from("x"))
1156 .with_as_type(Some(FieldRef::from(field)));
1157 let result = variant_get(&array, options).unwrap();
1158
1159 let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(42)]));
1161 assert_eq!(&result, &expected);
1162 }
1163
1164 fn shredded_object_with_x_field_variant_array() -> ArrayRef {
1176 let (metadata, y_field_value) = {
1178 let mut builder = parquet_variant::VariantBuilder::new();
1179 let mut obj = builder.new_object();
1180 obj.insert("x", Variant::Int32(42));
1181 obj.insert("y", Variant::from("foo"));
1182 obj.finish();
1183 builder.finish()
1184 };
1185
1186 let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
1188
1189 let empty_object_value = {
1194 let mut builder = parquet_variant::VariantBuilder::new();
1195 let obj = builder.new_object();
1196 obj.finish();
1197 let (_, value) = builder.finish();
1198 value
1199 };
1200
1201 let value_array = BinaryViewArray::from(vec![
1202 Some(y_field_value.as_slice()), Some(empty_object_value.as_slice()), ]);
1205
1206 let x_field_typed_value = Int32Array::from(vec![Some(1), Some(42)]);
1209
1210 let x_field_struct = StructArrayBuilder::new()
1212 .with_field("typed_value", Arc::new(x_field_typed_value), true)
1213 .build();
1214
1215 let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
1217 .expect("should create ShreddedVariantFieldArray");
1218
1219 let typed_value_fields = Fields::from(vec![Field::new(
1221 "x",
1222 x_field_shredded.data_type().clone(),
1223 true,
1224 )]);
1225 let typed_value_struct = StructArray::try_new(
1226 typed_value_fields,
1227 vec![ArrayRef::from(x_field_shredded)],
1228 None, )
1230 .unwrap();
1231
1232 let main_struct = StructArrayBuilder::new()
1234 .with_field("metadata", Arc::new(metadata_array), false)
1235 .with_field("value", Arc::new(value_array), true)
1236 .with_field("typed_value", Arc::new(typed_value_struct), true)
1237 .build();
1238
1239 Arc::new(main_struct)
1240 }
1241
1242 #[test]
1244 fn test_simple_nested_path_support() {
1245 println!("Testing path parsing:");
1247
1248 let path_x = VariantPath::from("x");
1249 let elements_x: Vec<_> = path_x.iter().collect();
1250 println!(" 'x' -> {} elements: {:?}", elements_x.len(), elements_x);
1251
1252 let path_ax = VariantPath::from("a.x");
1253 let elements_ax: Vec<_> = path_ax.iter().collect();
1254 println!(
1255 " 'a.x' -> {} elements: {:?}",
1256 elements_ax.len(),
1257 elements_ax
1258 );
1259
1260 let path_ax_alt = VariantPath::from("$.a.x");
1261 let elements_ax_alt: Vec<_> = path_ax_alt.iter().collect();
1262 println!(
1263 " '$.a.x' -> {} elements: {:?}",
1264 elements_ax_alt.len(),
1265 elements_ax_alt
1266 );
1267
1268 let path_nested = VariantPath::from("a").join("x");
1269 let elements_nested: Vec<_> = path_nested.iter().collect();
1270 println!(
1271 " VariantPath::from('a').join('x') -> {} elements: {:?}",
1272 elements_nested.len(),
1273 elements_nested
1274 );
1275
1276 let array = shredded_object_with_x_field_variant_array();
1278
1279 let real_nested_path = VariantPath::from("a").join("x");
1281 let options = GetOptions::new_with_path(real_nested_path);
1282 let result = variant_get(&array, options);
1283
1284 match result {
1285 Ok(_) => {
1286 println!("Nested path 'a.x' works unexpectedly!");
1287 }
1288 Err(e) => {
1289 println!("Nested path 'a.x' error: {}", e);
1290 if e.to_string().contains("Not yet implemented")
1291 || e.to_string().contains("NotYetImplemented")
1292 {
1293 println!("This is expected - nested paths are not implemented");
1294 return;
1295 }
1296 println!("This shows nested paths need implementation");
1298 }
1299 }
1300 }
1301
1302 #[test]
1306 fn test_depth_0_int32_conversion() {
1307 println!("=== Testing Depth 0: Direct field access ===");
1308
1309 let unshredded_array = create_depth_0_test_data();
1311
1312 let field = Field::new("result", DataType::Int32, true);
1313 let path = VariantPath::from("x");
1314 let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1315 let result = variant_get(&unshredded_array, options).unwrap();
1316
1317 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1318 Some(42), None, None, ]));
1322 assert_eq!(&result, &expected);
1323 println!("Depth 0 (unshredded) passed");
1324
1325 let shredded_array = create_depth_0_shredded_test_data_simple();
1327
1328 let field = Field::new("result", DataType::Int32, true);
1329 let path = VariantPath::from("x");
1330 let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1331 let result = variant_get(&shredded_array, options).unwrap();
1332
1333 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1334 Some(42), None, ]));
1337 assert_eq!(&result, &expected);
1338 println!("Depth 0 (shredded) passed");
1339 }
1340
1341 #[test]
1344 fn test_depth_1_int32_conversion() {
1345 println!("=== Testing Depth 1: Single nested field access ===");
1346
1347 let unshredded_array = create_nested_path_test_data();
1349
1350 let field = Field::new("result", DataType::Int32, true);
1351 let path = VariantPath::from("a.x"); let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1353 let result = variant_get(&unshredded_array, options).unwrap();
1354
1355 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1356 Some(55), None, ]));
1359 assert_eq!(&result, &expected);
1360 println!("Depth 1 (unshredded) passed");
1361
1362 let shredded_array = create_depth_1_shredded_test_data_working();
1364
1365 let field = Field::new("result", DataType::Int32, true);
1366 let path = VariantPath::from("a.x"); let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1368 let result = variant_get(&shredded_array, options).unwrap();
1369
1370 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1371 Some(55), None, ]));
1374 assert_eq!(&result, &expected);
1375 println!("Depth 1 (shredded) passed");
1376 }
1377
1378 #[test]
1381 fn test_depth_2_int32_conversion() {
1382 println!("=== Testing Depth 2: Double nested field access ===");
1383
1384 let unshredded_array = create_depth_2_test_data();
1386
1387 let field = Field::new("result", DataType::Int32, true);
1388 let path = VariantPath::from("a.b.x"); let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1390 let result = variant_get(&unshredded_array, options).unwrap();
1391
1392 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1393 Some(100), None, None, ]));
1397 assert_eq!(&result, &expected);
1398 println!("Depth 2 (unshredded) passed");
1399
1400 let shredded_array = create_depth_2_shredded_test_data_working();
1402
1403 let field = Field::new("result", DataType::Int32, true);
1404 let path = VariantPath::from("a.b.x"); let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
1406 let result = variant_get(&shredded_array, options).unwrap();
1407
1408 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
1409 Some(100), None, None, ]));
1413 assert_eq!(&result, &expected);
1414 println!("Depth 2 (shredded) passed");
1415 }
1416
1417 #[test]
1422 fn test_current_nested_path_functionality() {
1423 let array = shredded_object_with_x_field_variant_array();
1424
1425 let single_path = VariantPath::from("x");
1427 let field = Field::new("result", DataType::Int32, true);
1428 let options =
1429 GetOptions::new_with_path(single_path).with_as_type(Some(FieldRef::from(field)));
1430 let result = variant_get(&array, options).unwrap();
1431
1432 println!("Single path 'x' works - result: {:?}", result);
1433
1434 let nested_path = VariantPath::from("a").join("x");
1436 let field = Field::new("result", DataType::Int32, true);
1437 let options =
1438 GetOptions::new_with_path(nested_path).with_as_type(Some(FieldRef::from(field)));
1439 let result = variant_get(&array, options).unwrap();
1440
1441 println!("Nested path 'a.x' result: {:?}", result);
1442 }
1443
1444 fn create_depth_0_test_data() -> ArrayRef {
1447 let mut builder = crate::VariantArrayBuilder::new(3);
1448
1449 {
1451 let json_str = r#"{"x": 42}"#;
1452 let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1453 if let Ok(variant_array) = json_to_variant(&string_array) {
1454 builder.append_variant(variant_array.value(0));
1455 } else {
1456 builder.append_null();
1457 }
1458 }
1459
1460 {
1462 let json_str = r#"{"x": "foo"}"#;
1463 let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1464 if let Ok(variant_array) = json_to_variant(&string_array) {
1465 builder.append_variant(variant_array.value(0));
1466 } else {
1467 builder.append_null();
1468 }
1469 }
1470
1471 {
1473 let json_str = r#"{"y": 10}"#;
1474 let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1475 if let Ok(variant_array) = json_to_variant(&string_array) {
1476 builder.append_variant(variant_array.value(0));
1477 } else {
1478 builder.append_null();
1479 }
1480 }
1481
1482 ArrayRef::from(builder.build())
1483 }
1484
1485 fn create_nested_path_test_data() -> ArrayRef {
1488 let mut builder = crate::VariantArrayBuilder::new(2);
1489
1490 {
1492 let json_str = r#"{"a": {"x": 55}, "b": 42}"#;
1493 let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1494 if let Ok(variant_array) = json_to_variant(&string_array) {
1495 builder.append_variant(variant_array.value(0));
1496 } else {
1497 builder.append_null();
1498 }
1499 }
1500
1501 {
1503 let json_str = r#"{"a": {"x": "foo"}, "b": 42}"#;
1504 let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1505 if let Ok(variant_array) = json_to_variant(&string_array) {
1506 builder.append_variant(variant_array.value(0));
1507 } else {
1508 builder.append_null();
1509 }
1510 }
1511
1512 ArrayRef::from(builder.build())
1513 }
1514
1515 fn create_depth_2_test_data() -> ArrayRef {
1518 let mut builder = crate::VariantArrayBuilder::new(3);
1519
1520 {
1522 let json_str = r#"{"a": {"b": {"x": 100}}}"#;
1523 let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1524 if let Ok(variant_array) = json_to_variant(&string_array) {
1525 builder.append_variant(variant_array.value(0));
1526 } else {
1527 builder.append_null();
1528 }
1529 }
1530
1531 {
1533 let json_str = r#"{"a": {"b": {"x": "bar"}}}"#;
1534 let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1535 if let Ok(variant_array) = json_to_variant(&string_array) {
1536 builder.append_variant(variant_array.value(0));
1537 } else {
1538 builder.append_null();
1539 }
1540 }
1541
1542 {
1544 let json_str = r#"{"a": {"b": {"y": 200}}}"#;
1545 let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
1546 if let Ok(variant_array) = json_to_variant(&string_array) {
1547 builder.append_variant(variant_array.value(0));
1548 } else {
1549 builder.append_null();
1550 }
1551 }
1552
1553 ArrayRef::from(builder.build())
1554 }
1555
1556 fn create_depth_0_shredded_test_data_simple() -> ArrayRef {
1559 let (metadata, string_x_value) = {
1561 let mut builder = parquet_variant::VariantBuilder::new();
1562 let mut obj = builder.new_object();
1563 obj.insert("x", Variant::from("foo"));
1564 obj.finish();
1565 builder.finish()
1566 };
1567
1568 let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
1570
1571 let empty_object_value = {
1575 let mut builder = parquet_variant::VariantBuilder::new();
1576 let obj = builder.new_object();
1577 obj.finish();
1578 let (_, value) = builder.finish();
1579 value
1580 };
1581
1582 let value_array = BinaryViewArray::from(vec![
1583 Some(empty_object_value.as_slice()), Some(string_x_value.as_slice()), ]);
1586
1587 let x_field_typed_value = Int32Array::from(vec![Some(42), None]);
1589
1590 let x_field_struct = StructArrayBuilder::new()
1592 .with_field("typed_value", Arc::new(x_field_typed_value), true)
1593 .build();
1594
1595 let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
1596 .expect("should create ShreddedVariantFieldArray");
1597
1598 let typed_value_fields = Fields::from(vec![Field::new(
1600 "x",
1601 x_field_shredded.data_type().clone(),
1602 true,
1603 )]);
1604 let typed_value_struct = StructArray::try_new(
1605 typed_value_fields,
1606 vec![ArrayRef::from(x_field_shredded)],
1607 None,
1608 )
1609 .unwrap();
1610
1611 let struct_array = StructArrayBuilder::new()
1613 .with_field("metadata", Arc::new(metadata_array), false)
1614 .with_field("value", Arc::new(value_array), true)
1615 .with_field("typed_value", Arc::new(typed_value_struct), true)
1616 .build();
1617
1618 Arc::new(struct_array)
1619 }
1620
1621 fn create_depth_1_shredded_test_data_working() -> ArrayRef {
1626 let (metadata, _) = {
1628 let mut builder = parquet_variant::VariantBuilder::new();
1630 let mut obj = builder.new_object();
1631
1632 let mut a_obj = obj.new_object("a");
1634 a_obj.insert("x", Variant::Int32(55));
1635 a_obj.finish();
1636
1637 obj.insert("b", Variant::Int32(42));
1638 obj.finish();
1639 builder.finish()
1640 };
1641
1642 let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
1643
1644 let empty_object_value = {
1647 let mut builder = parquet_variant::VariantBuilder::new();
1648 let obj = builder.new_object();
1649 obj.finish();
1650 let (_, value) = builder.finish();
1651 value
1652 };
1653
1654 let row1_fallback = {
1657 let mut builder = parquet_variant::VariantBuilder::new();
1658 let mut obj = builder.new_object();
1659 obj.insert("fallback", Variant::from("data"));
1660 obj.finish();
1661 let (_, value) = builder.finish();
1662 value
1663 };
1664
1665 let value_array = BinaryViewArray::from(vec![
1666 Some(empty_object_value.as_slice()), Some(row1_fallback.as_slice()), ]);
1669
1670 let x_typed_value = Int32Array::from(vec![Some(55), None]);
1673 let x_field_struct = StructArrayBuilder::new()
1674 .with_field("typed_value", Arc::new(x_typed_value), true)
1675 .build();
1676 let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
1677 .expect("should create ShreddedVariantFieldArray for x");
1678
1679 let a_value_data = {
1684 let mut builder = parquet_variant::VariantBuilder::new();
1685 let obj = builder.new_object();
1686 obj.finish();
1687 let (_, value) = builder.finish();
1688 value
1689 };
1690 let a_value_array = BinaryViewArray::from(vec![
1691 None, Some(a_value_data.as_slice()), ]);
1694
1695 let a_inner_fields = Fields::from(vec![Field::new(
1696 "x",
1697 x_field_shredded.data_type().clone(),
1698 true,
1699 )]);
1700 let a_inner_struct = StructArrayBuilder::new()
1701 .with_field(
1702 "typed_value",
1703 Arc::new(
1704 StructArray::try_new(
1705 a_inner_fields,
1706 vec![ArrayRef::from(x_field_shredded)],
1707 None,
1708 )
1709 .unwrap(),
1710 ),
1711 true,
1712 )
1713 .with_field("value", Arc::new(a_value_array), true)
1714 .build();
1715 let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct)
1716 .expect("should create ShreddedVariantFieldArray for a");
1717
1718 let typed_value_fields = Fields::from(vec![Field::new(
1720 "a",
1721 a_field_shredded.data_type().clone(),
1722 true,
1723 )]);
1724 let typed_value_struct = StructArray::try_new(
1725 typed_value_fields,
1726 vec![ArrayRef::from(a_field_shredded)],
1727 None,
1728 )
1729 .unwrap();
1730
1731 let struct_array = StructArrayBuilder::new()
1733 .with_field("metadata", Arc::new(metadata_array), false)
1734 .with_field("value", Arc::new(value_array), true)
1735 .with_field("typed_value", Arc::new(typed_value_struct), true)
1736 .build();
1737
1738 Arc::new(struct_array)
1739 }
1740
1741 fn create_depth_2_shredded_test_data_working() -> ArrayRef {
1747 let (metadata, _) = {
1749 let mut builder = parquet_variant::VariantBuilder::new();
1751 let mut obj = builder.new_object();
1752
1753 let mut a_obj = obj.new_object("a");
1755 let mut b_obj = a_obj.new_object("b");
1756 b_obj.insert("x", Variant::Int32(100));
1757 b_obj.finish();
1758 a_obj.finish();
1759
1760 obj.finish();
1761 builder.finish()
1762 };
1763
1764 let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
1765
1766 let empty_object_value = {
1768 let mut builder = parquet_variant::VariantBuilder::new();
1769 let obj = builder.new_object();
1770 obj.finish();
1771 let (_, value) = builder.finish();
1772 value
1773 };
1774
1775 let value_array = BinaryViewArray::from(vec![
1777 Some(empty_object_value.as_slice()), Some(empty_object_value.as_slice()), Some(empty_object_value.as_slice()), ]);
1781
1782 let x_typed_value = Int32Array::from(vec![Some(100), None, None]);
1786 let x_field_struct = StructArrayBuilder::new()
1787 .with_field("typed_value", Arc::new(x_typed_value), true)
1788 .build();
1789 let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
1790 .expect("should create ShreddedVariantFieldArray for x");
1791
1792 let b_value_data = {
1794 let mut builder = parquet_variant::VariantBuilder::new();
1795 let obj = builder.new_object();
1796 obj.finish();
1797 let (_, value) = builder.finish();
1798 value
1799 };
1800 let b_value_array = BinaryViewArray::from(vec![
1801 None, Some(b_value_data.as_slice()), Some(b_value_data.as_slice()), ]);
1805
1806 let b_inner_fields = Fields::from(vec![Field::new(
1807 "x",
1808 x_field_shredded.data_type().clone(),
1809 true,
1810 )]);
1811 let b_inner_struct = StructArrayBuilder::new()
1812 .with_field(
1813 "typed_value",
1814 Arc::new(
1815 StructArray::try_new(
1816 b_inner_fields,
1817 vec![ArrayRef::from(x_field_shredded)],
1818 None,
1819 )
1820 .unwrap(),
1821 ),
1822 true,
1823 )
1824 .with_field("value", Arc::new(b_value_array), true)
1825 .build();
1826 let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_inner_struct)
1827 .expect("should create ShreddedVariantFieldArray for b");
1828
1829 let a_value_data = {
1831 let mut builder = parquet_variant::VariantBuilder::new();
1832 let obj = builder.new_object();
1833 obj.finish();
1834 let (_, value) = builder.finish();
1835 value
1836 };
1837 let a_value_array = BinaryViewArray::from(vec![
1838 None, Some(a_value_data.as_slice()), Some(a_value_data.as_slice()), ]);
1842
1843 let a_inner_fields = Fields::from(vec![Field::new(
1844 "b",
1845 b_field_shredded.data_type().clone(),
1846 true,
1847 )]);
1848 let a_inner_struct = StructArrayBuilder::new()
1849 .with_field(
1850 "typed_value",
1851 Arc::new(
1852 StructArray::try_new(
1853 a_inner_fields,
1854 vec![ArrayRef::from(b_field_shredded)],
1855 None,
1856 )
1857 .unwrap(),
1858 ),
1859 true,
1860 )
1861 .with_field("value", Arc::new(a_value_array), true)
1862 .build();
1863 let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct)
1864 .expect("should create ShreddedVariantFieldArray for a");
1865
1866 let typed_value_fields = Fields::from(vec![Field::new(
1868 "a",
1869 a_field_shredded.data_type().clone(),
1870 true,
1871 )]);
1872 let typed_value_struct = StructArray::try_new(
1873 typed_value_fields,
1874 vec![ArrayRef::from(a_field_shredded)],
1875 None,
1876 )
1877 .unwrap();
1878
1879 let struct_array = StructArrayBuilder::new()
1881 .with_field("metadata", Arc::new(metadata_array), false)
1882 .with_field("value", Arc::new(value_array), true)
1883 .with_field("typed_value", Arc::new(typed_value_struct), true)
1884 .build();
1885
1886 Arc::new(struct_array)
1887 }
1888
1889 #[test]
1890 fn test_strict_cast_options_downcast_failure() {
1891 use arrow::compute::CastOptions;
1892 use arrow::datatypes::{DataType, Field};
1893 use arrow::error::ArrowError;
1894 use parquet_variant::VariantPath;
1895 use std::sync::Arc;
1896
1897 let variant_array = perfectly_shredded_int32_variant_array();
1899
1900 let safe_options = GetOptions {
1902 path: VariantPath::from("nonexistent_field"),
1903 as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
1904 cast_options: CastOptions::default(), };
1906
1907 let variant_array_ref: Arc<dyn Array> = variant_array.clone();
1908 let result = variant_get(&variant_array_ref, safe_options);
1909 assert!(result.is_ok());
1911 let result_array = result.unwrap();
1912 assert_eq!(result_array.len(), 3);
1913 assert!(result_array.is_null(0));
1914 assert!(result_array.is_null(1));
1915 assert!(result_array.is_null(2));
1916
1917 let strict_options = GetOptions {
1919 path: VariantPath::from("nonexistent_field"),
1920 as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
1921 cast_options: CastOptions {
1922 safe: false,
1923 ..Default::default()
1924 },
1925 };
1926
1927 let result = variant_get(&variant_array_ref, strict_options);
1928 assert!(result.is_err());
1930 let error = result.unwrap_err();
1931 assert!(matches!(error, ArrowError::CastError(_)));
1932 assert!(
1933 error
1934 .to_string()
1935 .contains("Cannot access field 'nonexistent_field' on non-struct type")
1936 );
1937 }
1938
1939 #[test]
1940 fn test_null_buffer_union_for_shredded_paths() {
1941 use arrow::compute::CastOptions;
1942 use arrow::datatypes::{DataType, Field};
1943 use parquet_variant::VariantPath;
1944 use std::sync::Arc;
1945
1946 let variant_array = create_depth_1_shredded_test_data_working();
1955
1956 let options = GetOptions {
1961 path: VariantPath::from("a.x"),
1962 as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
1963 cast_options: CastOptions::default(),
1964 };
1965
1966 let variant_array_ref: Arc<dyn Array> = variant_array.clone();
1967 let result = variant_get(&variant_array_ref, options).unwrap();
1968
1969 assert_eq!(result.len(), variant_array.len());
1971
1972 assert!(!result.is_null(0), "Row 0 should have valid Int32 data");
1976 assert!(
1977 result.is_null(1),
1978 "Row 1 should be null due to type casting failure"
1979 );
1980
1981 let int32_result = result.as_any().downcast_ref::<Int32Array>().unwrap();
1983 assert_eq!(int32_result.value(0), 55); }
1985
1986 #[test]
1987 fn test_struct_null_mask_union_from_children() {
1988 use arrow::compute::CastOptions;
1989 use arrow::datatypes::{DataType, Field, Fields};
1990 use parquet_variant::VariantPath;
1991 use std::sync::Arc;
1992
1993 use arrow::array::StringArray;
1994
1995 let json_strings = vec![
2000 r#"{"a": 42, "b": "hello"}"#, r#"{"a": "world", "b": 100}"#, r#"{"a": 55, "b": 77}"#, ];
2004
2005 let string_array: Arc<dyn arrow::array::Array> = Arc::new(StringArray::from(json_strings));
2006 let variant_array = json_to_variant(&string_array).unwrap();
2007
2008 let struct_fields = Fields::from(vec![
2011 Field::new("a", DataType::Int32, true),
2012 Field::new("b", DataType::Int32, true),
2013 ]);
2014 let struct_type = DataType::Struct(struct_fields);
2015
2016 let options = GetOptions {
2017 path: VariantPath::default(), as_type: Some(Arc::new(Field::new("result", struct_type, true))),
2019 cast_options: CastOptions::default(),
2020 };
2021
2022 let variant_array_ref = ArrayRef::from(variant_array);
2023 let result = variant_get(&variant_array_ref, options).unwrap();
2024
2025 let struct_result = result.as_struct();
2027 assert_eq!(struct_result.len(), 3);
2028
2029 let field_a = struct_result
2031 .column(0)
2032 .as_any()
2033 .downcast_ref::<Int32Array>()
2034 .unwrap();
2035 let field_b = struct_result
2036 .column(1)
2037 .as_any()
2038 .downcast_ref::<Int32Array>()
2039 .unwrap();
2040
2041 assert!(!field_a.is_null(0));
2044 assert_eq!(field_a.value(0), 42);
2045 assert!(field_b.is_null(0)); assert!(field_a.is_null(1)); assert!(!field_b.is_null(1));
2050 assert_eq!(field_b.value(1), 100);
2051
2052 assert!(!field_a.is_null(2));
2054 assert_eq!(field_a.value(2), 55);
2055 assert!(!field_b.is_null(2));
2056 assert_eq!(field_b.value(2), 77);
2057
2058 assert!(!struct_result.is_null(0)); assert!(!struct_result.is_null(1)); assert!(!struct_result.is_null(2)); }
2065
2066 #[test]
2067 fn test_field_nullability_preservation() {
2068 use arrow::compute::CastOptions;
2069 use arrow::datatypes::{DataType, Field};
2070 use parquet_variant::VariantPath;
2071 use std::sync::Arc;
2072
2073 use arrow::array::StringArray;
2074
2075 let json_strings = vec![
2078 r#"{"x": 42}"#, r#"{"x": "not_a_number"}"#, r#"{"x": null}"#, r#"{"x": "hello"}"#, r#"{"y": 100}"#, r#"{"x": 127}"#, r#"{"x": 32767}"#, r#"{"x": 2147483647}"#, r#"{"x": 9223372036854775807}"#, ];
2088
2089 let string_array: Arc<dyn arrow::array::Array> = Arc::new(StringArray::from(json_strings));
2090 let variant_array = json_to_variant(&string_array).unwrap();
2091
2092 let nullable_field = Arc::new(Field::new("result", DataType::Int32, true));
2094 let options_nullable = GetOptions {
2095 path: VariantPath::from("x"),
2096 as_type: Some(nullable_field.clone()),
2097 cast_options: CastOptions::default(),
2098 };
2099
2100 let variant_array_ref = ArrayRef::from(variant_array);
2101 let result_nullable = variant_get(&variant_array_ref, options_nullable).unwrap();
2102
2103 let int32_result = result_nullable
2105 .as_any()
2106 .downcast_ref::<Int32Array>()
2107 .unwrap();
2108 assert_eq!(int32_result.len(), 9);
2109
2110 assert!(!int32_result.is_null(0));
2112 assert_eq!(int32_result.value(0), 42);
2113
2114 assert!(int32_result.is_null(1));
2116
2117 assert!(int32_result.is_null(2));
2119
2120 assert!(int32_result.is_null(3));
2122
2123 assert!(int32_result.is_null(4));
2125
2126 assert!(!int32_result.is_null(5));
2129 assert_eq!(int32_result.value(5), 127);
2130
2131 assert!(!int32_result.is_null(6));
2134 assert_eq!(int32_result.value(6), 32767);
2135
2136 assert!(!int32_result.is_null(7));
2139 assert_eq!(int32_result.value(7), 2147483647);
2140
2141 assert!(int32_result.is_null(8));
2144
2145 let non_nullable_field = Arc::new(Field::new("result", DataType::Int32, false));
2147 let options_non_nullable = GetOptions {
2148 path: VariantPath::from("x"),
2149 as_type: Some(non_nullable_field.clone()),
2150 cast_options: CastOptions::default(), };
2152
2153 let variant_array_2 = json_to_variant(&string_array).unwrap();
2155 let variant_array_ref_2 = ArrayRef::from(variant_array_2);
2156 let result_non_nullable = variant_get(&variant_array_ref_2, options_non_nullable).unwrap();
2157 let int32_result_2 = result_non_nullable
2158 .as_any()
2159 .downcast_ref::<Int32Array>()
2160 .unwrap();
2161
2162 assert_eq!(int32_result_2.len(), 9);
2164
2165 assert!(!int32_result_2.is_null(0));
2167 assert_eq!(int32_result_2.value(0), 42);
2168
2169 assert!(int32_result_2.is_null(1)); assert!(int32_result_2.is_null(2)); assert!(int32_result_2.is_null(3)); assert!(int32_result_2.is_null(4)); assert!(!int32_result_2.is_null(5)); assert_eq!(int32_result_2.value(5), 127);
2179 assert!(!int32_result_2.is_null(6)); assert_eq!(int32_result_2.value(6), 32767);
2181 assert!(!int32_result_2.is_null(7)); assert_eq!(int32_result_2.value(7), 2147483647);
2183
2184 assert!(int32_result_2.is_null(8)); }
2187
2188 #[test]
2189 fn test_struct_extraction_subset_superset_schema_perfectly_shredded() {
2190 let variant_array = create_comprehensive_shredded_variant();
2192
2193 let struct_fields = Fields::from(vec![
2195 Field::new("a", DataType::Int32, true),
2196 Field::new("b", DataType::Int32, true),
2197 Field::new("d", DataType::Int32, true),
2198 ]);
2199 let struct_type = DataType::Struct(struct_fields);
2200
2201 let options = GetOptions {
2202 path: VariantPath::default(),
2203 as_type: Some(Arc::new(Field::new("result", struct_type, true))),
2204 cast_options: CastOptions::default(),
2205 };
2206
2207 let result = variant_get(&variant_array, options).unwrap();
2208
2209 let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
2211 assert_eq!(struct_result.len(), 5);
2212 assert_eq!(struct_result.num_columns(), 3);
2213
2214 let field_a = struct_result
2215 .column(0)
2216 .as_any()
2217 .downcast_ref::<Int32Array>()
2218 .unwrap();
2219 let field_b = struct_result
2220 .column(1)
2221 .as_any()
2222 .downcast_ref::<Int32Array>()
2223 .unwrap();
2224 let field_d = struct_result
2225 .column(2)
2226 .as_any()
2227 .downcast_ref::<Int32Array>()
2228 .unwrap();
2229
2230 assert!(!struct_result.is_null(0));
2232 assert_eq!(field_a.value(0), 1);
2233 assert_eq!(field_b.value(0), 2);
2234 assert!(field_d.is_null(0)); assert!(struct_result.is_null(1));
2238
2239 assert!(!struct_result.is_null(2));
2241 assert!(field_a.is_null(2)); assert_eq!(field_b.value(2), 2);
2243 assert!(field_d.is_null(2)); assert!(!struct_result.is_null(3));
2247 assert_eq!(field_a.value(3), 1);
2248 assert!(field_b.is_null(3)); assert!(field_d.is_null(3)); assert!(!struct_result.is_null(4));
2253 assert!(field_a.is_null(4)); assert!(field_b.is_null(4)); assert!(field_d.is_null(4)); }
2257
2258 #[test]
2259 fn test_nested_struct_extraction_perfectly_shredded() {
2260 let variant_array = create_comprehensive_nested_shredded_variant();
2262 println!("variant_array: {variant_array:?}");
2263
2264 let inner_field = Field::new("inner", DataType::Int32, true);
2266 let inner_type = DataType::Struct(Fields::from(vec![inner_field]));
2267 let outer_field = Field::new("outer", inner_type, true);
2268 let result_type = DataType::Struct(Fields::from(vec![outer_field]));
2269
2270 let options = GetOptions {
2271 path: VariantPath::default(),
2272 as_type: Some(Arc::new(Field::new("result", result_type, true))),
2273 cast_options: CastOptions::default(),
2274 };
2275
2276 let result = variant_get(&variant_array, options).unwrap();
2277 println!("result: {result:?}");
2278
2279 let outer_struct = result.as_any().downcast_ref::<StructArray>().unwrap();
2281 assert_eq!(outer_struct.len(), 4);
2282 assert_eq!(outer_struct.num_columns(), 1);
2283
2284 let inner_struct = outer_struct
2286 .column(0)
2287 .as_any()
2288 .downcast_ref::<StructArray>()
2289 .unwrap();
2290 assert_eq!(inner_struct.num_columns(), 1);
2291
2292 let leaf_field = inner_struct
2294 .column(0)
2295 .as_any()
2296 .downcast_ref::<Int32Array>()
2297 .unwrap();
2298
2299 assert!(!outer_struct.is_null(0));
2301 assert!(!inner_struct.is_null(0));
2302 assert_eq!(leaf_field.value(0), 42);
2303
2304 assert!(!outer_struct.is_null(1));
2306 assert!(!inner_struct.is_null(1)); assert!(leaf_field.is_null(1)); assert!(!outer_struct.is_null(2));
2311 assert!(inner_struct.is_null(2)); assert!(outer_struct.is_null(3));
2315 }
2316
2317 #[test]
2318 fn test_path_based_null_masks_one_step() {
2319 let variant_array = create_comprehensive_nested_shredded_variant();
2321
2322 let path = VariantPath::from("outer");
2324 let inner_field = Field::new("inner", DataType::Int32, true);
2325 let result_type = DataType::Struct(Fields::from(vec![inner_field]));
2326
2327 let options = GetOptions {
2328 path,
2329 as_type: Some(Arc::new(Field::new("result", result_type, true))),
2330 cast_options: CastOptions::default(),
2331 };
2332
2333 let result = variant_get(&variant_array, options).unwrap();
2334
2335 let outer_result = result.as_any().downcast_ref::<StructArray>().unwrap();
2337 assert_eq!(outer_result.len(), 4);
2338 assert_eq!(outer_result.num_columns(), 1);
2339
2340 let inner_field = outer_result
2342 .column(0)
2343 .as_any()
2344 .downcast_ref::<Int32Array>()
2345 .unwrap();
2346
2347 assert!(!outer_result.is_null(0));
2349 assert_eq!(inner_field.value(0), 42);
2350
2351 assert!(!outer_result.is_null(1));
2353 assert!(inner_field.is_null(1));
2354
2355 assert!(outer_result.is_null(2));
2357
2358 assert!(outer_result.is_null(3));
2360 }
2361
2362 #[test]
2363 fn test_path_based_null_masks_two_steps() {
2364 let variant_array = create_comprehensive_nested_shredded_variant();
2366
2367 let path = VariantPath::from("outer").join("inner");
2369
2370 let options = GetOptions {
2371 path,
2372 as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
2373 cast_options: CastOptions::default(),
2374 };
2375
2376 let result = variant_get(&variant_array, options).unwrap();
2377
2378 let int_result = result.as_any().downcast_ref::<Int32Array>().unwrap();
2380 assert_eq!(int_result.len(), 4);
2381
2382 assert!(!int_result.is_null(0));
2384 assert_eq!(int_result.value(0), 42);
2385
2386 assert!(int_result.is_null(1));
2388
2389 assert!(int_result.is_null(2));
2391
2392 assert!(int_result.is_null(3));
2394 }
2395
2396 #[test]
2397 fn test_struct_extraction_mixed_and_unshredded() {
2398 let variant_array = create_mixed_and_unshredded_variant();
2400
2401 let struct_fields = Fields::from(vec![
2403 Field::new("x", DataType::Int32, true),
2404 Field::new("y", DataType::Int32, true),
2405 ]);
2406 let struct_type = DataType::Struct(struct_fields);
2407
2408 let options = GetOptions {
2409 path: VariantPath::default(),
2410 as_type: Some(Arc::new(Field::new("result", struct_type, true))),
2411 cast_options: CastOptions::default(),
2412 };
2413
2414 let result = variant_get(&variant_array, options).unwrap();
2415
2416 let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
2418 assert_eq!(struct_result.len(), 4);
2419 assert_eq!(struct_result.num_columns(), 2);
2420
2421 let field_x = struct_result
2422 .column(0)
2423 .as_any()
2424 .downcast_ref::<Int32Array>()
2425 .unwrap();
2426 let field_y = struct_result
2427 .column(1)
2428 .as_any()
2429 .downcast_ref::<Int32Array>()
2430 .unwrap();
2431
2432 assert_eq!(field_x.value(0), 1);
2434 assert_eq!(field_y.value(0), 42);
2435
2436 assert_eq!(field_x.value(1), 2);
2438 assert!(field_y.is_null(1));
2439
2440 assert_eq!(field_x.value(2), 3);
2442 assert!(field_y.is_null(2));
2443
2444 assert!(struct_result.is_null(3));
2446 }
2447
2448 #[test]
2451 fn test_struct_row_builder_gap_demonstration() {
2452 let json_strings = vec![
2454 r#"{"outer": {"inner": 42}}"#,
2455 r#"{"outer": {"inner": 100}}"#,
2456 ];
2457 let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
2458 let variant_array = json_to_variant(&string_array).unwrap();
2459
2460 let inner_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]);
2462 let inner_struct_type = DataType::Struct(inner_fields);
2463 let outer_fields = Fields::from(vec![Field::new("outer", inner_struct_type, true)]);
2464 let outer_struct_type = DataType::Struct(outer_fields);
2465
2466 let options = GetOptions {
2467 path: VariantPath::default(),
2468 as_type: Some(Arc::new(Field::new("result", outer_struct_type, true))),
2469 cast_options: CastOptions::default(),
2470 };
2471
2472 let variant_array_ref = ArrayRef::from(variant_array);
2473 let result = variant_get(&variant_array_ref, options);
2474
2475 assert!(result.is_err());
2477 let error = result.unwrap_err();
2478 assert!(error.to_string().contains("Not yet implemented"));
2479 }
2480
2481 fn create_comprehensive_shredded_variant() -> ArrayRef {
2484 let (metadata, _) = {
2485 let mut builder = parquet_variant::VariantBuilder::new();
2486 let obj = builder.new_object();
2487 obj.finish();
2488 builder.finish()
2489 };
2490
2491 let nulls = NullBuffer::from(vec![
2493 true, false, true, true, true, ]);
2499
2500 let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 5));
2501
2502 let a_field_typed_value = Int32Array::from(vec![Some(1), None, None, Some(1), None]);
2505 let a_field_struct = StructArrayBuilder::new()
2506 .with_field("typed_value", Arc::new(a_field_typed_value), true)
2507 .build();
2508 let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_field_struct)
2509 .expect("should create ShreddedVariantFieldArray for a");
2510
2511 let b_field_typed_value = Int32Array::from(vec![Some(2), None, Some(2), None, None]);
2513 let b_field_struct = StructArrayBuilder::new()
2514 .with_field("typed_value", Arc::new(b_field_typed_value), true)
2515 .build();
2516 let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_field_struct)
2517 .expect("should create ShreddedVariantFieldArray for b");
2518
2519 let c_field_typed_value = Int32Array::from(vec![Some(3), None, None, None, None]);
2521 let c_field_struct = StructArrayBuilder::new()
2522 .with_field("typed_value", Arc::new(c_field_typed_value), true)
2523 .build();
2524 let c_field_shredded = ShreddedVariantFieldArray::try_new(&c_field_struct)
2525 .expect("should create ShreddedVariantFieldArray for c");
2526
2527 let typed_value_fields = Fields::from(vec![
2529 Field::new("a", a_field_shredded.data_type().clone(), true),
2530 Field::new("b", b_field_shredded.data_type().clone(), true),
2531 Field::new("c", c_field_shredded.data_type().clone(), true),
2532 ]);
2533 let typed_value_struct = StructArray::try_new(
2534 typed_value_fields,
2535 vec![
2536 ArrayRef::from(a_field_shredded),
2537 ArrayRef::from(b_field_shredded),
2538 ArrayRef::from(c_field_shredded),
2539 ],
2540 None,
2541 )
2542 .unwrap();
2543
2544 let struct_array = StructArrayBuilder::new()
2546 .with_field("metadata", Arc::new(metadata_array), false)
2547 .with_field("typed_value", Arc::new(typed_value_struct), true)
2548 .with_nulls(nulls)
2549 .build();
2550
2551 Arc::new(struct_array)
2552 }
2553
2554 fn create_comprehensive_nested_shredded_variant() -> ArrayRef {
2559 let inner_typed_value = Int32Array::from(vec![Some(42), None, None, None]); let inner = StructArrayBuilder::new()
2563 .with_field("typed_value", Arc::new(inner_typed_value), true)
2564 .build();
2565 let inner = ShreddedVariantFieldArray::try_new(&inner).unwrap();
2566
2567 let outer_typed_value_nulls = NullBuffer::from(vec![
2568 true, false, false, false, ]);
2573 let outer_typed_value = StructArrayBuilder::new()
2574 .with_field("inner", ArrayRef::from(inner), false)
2575 .with_nulls(outer_typed_value_nulls)
2576 .build();
2577
2578 let outer = StructArrayBuilder::new()
2579 .with_field("typed_value", Arc::new(outer_typed_value), true)
2580 .build();
2581 let outer = ShreddedVariantFieldArray::try_new(&outer).unwrap();
2582
2583 let typed_value_nulls = NullBuffer::from(vec![
2584 true, true, false, false, ]);
2589 let typed_value = StructArrayBuilder::new()
2590 .with_field("outer", ArrayRef::from(outer), false)
2591 .with_nulls(typed_value_nulls)
2592 .build();
2593
2594 let metadata_array =
2596 BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 4));
2597 let nulls = NullBuffer::from(vec![
2598 true, true, true, false, ]);
2603 let struct_array = StructArrayBuilder::new()
2604 .with_field("metadata", Arc::new(metadata_array), false)
2605 .with_field("typed_value", Arc::new(typed_value), true)
2606 .with_nulls(nulls)
2607 .build();
2608
2609 Arc::new(struct_array)
2610 }
2611
2612 fn create_mixed_and_unshredded_variant() -> ArrayRef {
2615 let (metadata, y_field_value) = {
2620 let mut builder = parquet_variant::VariantBuilder::new();
2621 let mut obj = builder.new_object();
2622 obj.insert("y", Variant::from(42));
2623 obj.finish();
2624 builder.finish()
2625 };
2626
2627 let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
2628
2629 let empty_object_value = {
2636 let mut builder = parquet_variant::VariantBuilder::new();
2637 builder.new_object().finish();
2638 let (_, value) = builder.finish();
2639 value
2640 };
2641
2642 let y_null_value = {
2643 let mut builder = parquet_variant::VariantBuilder::new();
2644 builder.new_object().with_field("y", Variant::Null).finish();
2645 let (_, value) = builder.finish();
2646 value
2647 };
2648
2649 let value_array = BinaryViewArray::from(vec![
2650 Some(y_field_value.as_slice()), Some(empty_object_value.as_slice()), Some(y_null_value.as_slice()), Some(empty_object_value.as_slice()), ]);
2655
2656 let x_field_typed_value = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(0)]);
2659 let x_field_struct = StructArrayBuilder::new()
2660 .with_field("typed_value", Arc::new(x_field_typed_value), true)
2661 .build();
2662 let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
2663 .expect("should create ShreddedVariantFieldArray for x");
2664
2665 let typed_value_struct = StructArrayBuilder::new()
2667 .with_field("x", ArrayRef::from(x_field_shredded), false)
2668 .build();
2669
2670 let variant_nulls = NullBuffer::from(vec![true, true, true, false]); let struct_array = StructArrayBuilder::new()
2674 .with_field("metadata", Arc::new(metadata_array), false)
2675 .with_field("value", Arc::new(value_array), true)
2676 .with_field("typed_value", Arc::new(typed_value_struct), true)
2677 .with_nulls(variant_nulls)
2678 .build();
2679
2680 Arc::new(struct_array)
2681 }
2682}