1use arrow::{
18 array::{Array, ArrayRef},
19 compute::CastOptions,
20 error::Result,
21};
22use arrow_schema::{ArrowError, FieldRef};
23use parquet_variant::VariantPath;
24
25use crate::variant_array::ShreddingState;
26use crate::variant_get::output::instantiate_output_builder;
27use crate::VariantArray;
28
29mod output;
30
31pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
38 let variant_array: &VariantArray = input.as_any().downcast_ref().ok_or_else(|| {
39 ArrowError::InvalidArgumentError(
40 "expected a VariantArray as the input for variant_get".to_owned(),
41 )
42 })?;
43
44 let output_builder = instantiate_output_builder(options.clone())?;
46
47 match variant_array.shredding_state() {
49 ShreddingState::PartiallyShredded {
50 metadata,
51 value,
52 typed_value,
53 } => output_builder.partially_shredded(variant_array, metadata, value, typed_value),
54 ShreddingState::Typed {
55 metadata,
56 typed_value,
57 } => output_builder.typed(variant_array, metadata, typed_value),
58 ShreddingState::Unshredded { metadata, value } => {
59 output_builder.unshredded(variant_array, metadata, value)
60 }
61 ShreddingState::AllNull { metadata } => output_builder.all_null(variant_array, metadata),
62 }
63}
64
65#[derive(Debug, Clone, Default)]
67pub struct GetOptions<'a> {
68 pub path: VariantPath<'a>,
70 pub as_type: Option<FieldRef>,
74 pub cast_options: CastOptions<'a>,
76}
77
78impl<'a> GetOptions<'a> {
79 pub fn new() -> Self {
81 Default::default()
82 }
83
84 pub fn new_with_path(path: VariantPath<'a>) -> Self {
86 Self {
87 path,
88 as_type: None,
89 cast_options: Default::default(),
90 }
91 }
92
93 pub fn with_as_type(mut self, as_type: Option<FieldRef>) -> Self {
95 self.as_type = as_type;
96 self
97 }
98
99 pub fn with_cast_options(mut self, cast_options: CastOptions<'a>) -> Self {
101 self.cast_options = cast_options;
102 self
103 }
104}
105
106#[cfg(test)]
107mod test {
108 use std::sync::Arc;
109
110 use arrow::array::{
111 Array, ArrayRef, BinaryViewArray, Float16Array, Float32Array, Float64Array, Int16Array,
112 Int32Array, Int64Array, Int8Array, StringArray, StructArray, UInt16Array, UInt32Array,
113 UInt64Array, UInt8Array,
114 };
115 use arrow::buffer::NullBuffer;
116 use arrow::compute::CastOptions;
117 use arrow_schema::{DataType, Field, FieldRef, Fields};
118 use parquet_variant::{Variant, VariantPath};
119
120 use crate::json_to_variant;
121 use crate::VariantArray;
122
123 use super::{variant_get, GetOptions};
124
125 fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) {
126 let input_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(input_json)]));
128 let input_variant_array_ref: ArrayRef =
129 Arc::new(json_to_variant(&input_array_ref).unwrap());
130
131 let result =
132 variant_get(&input_variant_array_ref, GetOptions::new_with_path(path)).unwrap();
133
134 let expected_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(expected_json)]));
136 let expected_variant_array = json_to_variant(&expected_array_ref).unwrap();
137
138 let result_array: &VariantArray = result.as_any().downcast_ref().unwrap();
139 assert_eq!(
140 result_array.len(),
141 1,
142 "Expected result array to have length 1"
143 );
144 assert!(
145 result_array.nulls().is_none(),
146 "Expected no nulls in result array"
147 );
148 let result_variant = result_array.value(0);
149 let expected_variant = expected_variant_array.value(0);
150 assert_eq!(
151 result_variant, expected_variant,
152 "Result variant does not match expected variant"
153 );
154 }
155
156 #[test]
157 fn get_primitive_variant_field() {
158 single_variant_get_test(
159 r#"{"some_field": 1234}"#,
160 VariantPath::from("some_field"),
161 "1234",
162 );
163 }
164
165 #[test]
166 fn get_primitive_variant_list_index() {
167 single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
168 }
169
170 #[test]
171 fn get_primitive_variant_inside_object_of_object() {
172 single_variant_get_test(
173 r#"{"top_level_field": {"inner_field": 1234}}"#,
174 VariantPath::from("top_level_field").join("inner_field"),
175 "1234",
176 );
177 }
178
179 #[test]
180 fn get_primitive_variant_inside_list_of_object() {
181 single_variant_get_test(
182 r#"[{"some_field": 1234}]"#,
183 VariantPath::from(0).join("some_field"),
184 "1234",
185 );
186 }
187
188 #[test]
189 fn get_primitive_variant_inside_object_of_list() {
190 single_variant_get_test(
191 r#"{"some_field": [1234]}"#,
192 VariantPath::from("some_field").join(0),
193 "1234",
194 );
195 }
196
197 #[test]
198 fn get_complex_variant() {
199 single_variant_get_test(
200 r#"{"top_level_field": {"inner_field": 1234}}"#,
201 VariantPath::from("top_level_field"),
202 r#"{"inner_field": 1234}"#,
203 );
204 }
205
206 macro_rules! numeric_partially_shredded_test {
208 ($primitive_type:ty, $data_fn:ident) => {
209 let array = $data_fn();
210 let options = GetOptions::new();
211 let result = variant_get(&array, options).unwrap();
212
213 let result: &VariantArray = result.as_any().downcast_ref().unwrap();
215 assert_eq!(result.len(), 4);
216
217 assert_eq!(
219 result.value(0),
220 Variant::from(<$primitive_type>::try_from(34u8).unwrap())
221 );
222 assert!(!result.is_valid(1));
223 assert_eq!(result.value(2), Variant::from("n/a"));
224 assert_eq!(
225 result.value(3),
226 Variant::from(<$primitive_type>::try_from(100u8).unwrap())
227 );
228 };
229 }
230
231 #[test]
232 fn get_variant_partially_shredded_int8_as_variant() {
233 numeric_partially_shredded_test!(i8, partially_shredded_int8_variant_array);
234 }
235
236 #[test]
237 fn get_variant_partially_shredded_int16_as_variant() {
238 numeric_partially_shredded_test!(i16, partially_shredded_int16_variant_array);
239 }
240
241 #[test]
242 fn get_variant_partially_shredded_int32_as_variant() {
243 numeric_partially_shredded_test!(i32, partially_shredded_int32_variant_array);
244 }
245
246 #[test]
247 fn get_variant_partially_shredded_int64_as_variant() {
248 numeric_partially_shredded_test!(i64, partially_shredded_int64_variant_array);
249 }
250
251 #[test]
252 fn get_variant_partially_shredded_uint8_as_variant() {
253 numeric_partially_shredded_test!(u8, partially_shredded_uint8_variant_array);
254 }
255
256 #[test]
257 fn get_variant_partially_shredded_uint16_as_variant() {
258 numeric_partially_shredded_test!(u16, partially_shredded_uint16_variant_array);
259 }
260
261 #[test]
262 fn get_variant_partially_shredded_uint32_as_variant() {
263 numeric_partially_shredded_test!(u32, partially_shredded_uint32_variant_array);
264 }
265
266 #[test]
267 fn get_variant_partially_shredded_uint64_as_variant() {
268 numeric_partially_shredded_test!(u64, partially_shredded_uint64_variant_array);
269 }
270
271 #[test]
272 fn get_variant_partially_shredded_float16_as_variant() {
273 numeric_partially_shredded_test!(half::f16, partially_shredded_float16_variant_array);
274 }
275
276 #[test]
277 fn get_variant_partially_shredded_float32_as_variant() {
278 numeric_partially_shredded_test!(f32, partially_shredded_float32_variant_array);
279 }
280
281 #[test]
282 fn get_variant_partially_shredded_float64_as_variant() {
283 numeric_partially_shredded_test!(f64, partially_shredded_float64_variant_array);
284 }
285
286 #[test]
288 fn get_variant_shredded_int32_as_int32_safe_cast() {
289 let array = partially_shredded_int32_variant_array();
291 let field = Field::new("typed_value", DataType::Int32, true);
293 let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
294 let result = variant_get(&array, options).unwrap();
295 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
296 Some(34),
297 None,
298 None, Some(100),
300 ]));
301 assert_eq!(&result, &expected)
302 }
303
304 #[test]
307 fn get_variant_shredded_int32_as_int32_unsafe_cast() {
308 let array = partially_shredded_int32_variant_array();
310 let field = Field::new("typed_value", DataType::Int32, true);
311 let cast_options = CastOptions {
312 safe: false, ..Default::default()
314 };
315 let options = GetOptions::new()
316 .with_as_type(Some(FieldRef::from(field)))
317 .with_cast_options(cast_options);
318
319 let err = variant_get(&array, options).unwrap_err();
320 assert_eq!(err.to_string(), "Cast error: Failed to extract primitive of type Int32 from variant ShortString(ShortString(\"n/a\")) at path VariantPath([])");
322 }
323
324 macro_rules! numeric_perfectly_shredded_test {
326 ($primitive_type:ty, $data_fn:ident) => {
327 let array = $data_fn();
328 let options = GetOptions::new();
329 let result = variant_get(&array, options).unwrap();
330
331 let result: &VariantArray = result.as_any().downcast_ref().unwrap();
333 assert_eq!(result.len(), 3);
334
335 assert_eq!(
337 result.value(0),
338 Variant::from(<$primitive_type>::try_from(1u8).unwrap())
339 );
340 assert_eq!(
341 result.value(1),
342 Variant::from(<$primitive_type>::try_from(2u8).unwrap())
343 );
344 assert_eq!(
345 result.value(2),
346 Variant::from(<$primitive_type>::try_from(3u8).unwrap())
347 );
348 };
349 }
350
351 #[test]
352 fn get_variant_perfectly_shredded_int8_as_variant() {
353 numeric_perfectly_shredded_test!(i8, perfectly_shredded_int8_variant_array);
354 }
355
356 #[test]
357 fn get_variant_perfectly_shredded_int16_as_variant() {
358 numeric_perfectly_shredded_test!(i16, perfectly_shredded_int16_variant_array);
359 }
360
361 #[test]
362 fn get_variant_perfectly_shredded_int32_as_variant() {
363 numeric_perfectly_shredded_test!(i32, perfectly_shredded_int32_variant_array);
364 }
365
366 #[test]
367 fn get_variant_perfectly_shredded_int64_as_variant() {
368 numeric_perfectly_shredded_test!(i64, perfectly_shredded_int64_variant_array);
369 }
370
371 #[test]
372 fn get_variant_perfectly_shredded_uint8_as_variant() {
373 numeric_perfectly_shredded_test!(u8, perfectly_shredded_uint8_variant_array);
374 }
375
376 #[test]
377 fn get_variant_perfectly_shredded_uint16_as_variant() {
378 numeric_perfectly_shredded_test!(u16, perfectly_shredded_uint16_variant_array);
379 }
380
381 #[test]
382 fn get_variant_perfectly_shredded_uint32_as_variant() {
383 numeric_perfectly_shredded_test!(u32, perfectly_shredded_uint32_variant_array);
384 }
385
386 #[test]
387 fn get_variant_perfectly_shredded_uint64_as_variant() {
388 numeric_perfectly_shredded_test!(u64, perfectly_shredded_uint64_variant_array);
389 }
390
391 #[test]
392 fn get_variant_perfectly_shredded_float16_as_variant() {
393 numeric_perfectly_shredded_test!(half::f16, perfectly_shredded_float16_variant_array);
394 }
395
396 #[test]
397 fn get_variant_perfectly_shredded_float32_as_variant() {
398 numeric_perfectly_shredded_test!(f32, perfectly_shredded_float32_variant_array);
399 }
400
401 #[test]
402 fn get_variant_perfectly_shredded_float64_as_variant() {
403 numeric_perfectly_shredded_test!(f64, perfectly_shredded_float64_variant_array);
404 }
405
406 #[test]
408 fn get_variant_perfectly_shredded_int32_as_int32() {
409 let array = perfectly_shredded_int32_variant_array();
411 let field = Field::new("typed_value", DataType::Int32, true);
413 let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
414 let result = variant_get(&array, options).unwrap();
415 let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]));
416 assert_eq!(&result, &expected)
417 }
418
419 #[test]
421 fn get_variant_all_null_as_variant() {
422 let array = all_null_variant_array();
423 let options = GetOptions::new();
424 let result = variant_get(&array, options).unwrap();
425
426 let result: &VariantArray = result.as_any().downcast_ref().unwrap();
428 assert_eq!(result.len(), 3);
429
430 assert!(!result.is_valid(0));
432 assert!(!result.is_valid(1));
433 assert!(!result.is_valid(2));
434 }
435
436 #[test]
438 fn get_variant_all_null_as_int32() {
439 let array = all_null_variant_array();
440 let field = Field::new("typed_value", DataType::Int32, true);
442 let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
443 let result = variant_get(&array, options).unwrap();
444
445 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
446 Option::<i32>::None,
447 Option::<i32>::None,
448 Option::<i32>::None,
449 ]));
450 assert_eq!(&result, &expected)
451 }
452
453 #[test]
454 fn get_variant_perfectly_shredded_int16_as_int16() {
455 let array = perfectly_shredded_int16_variant_array();
457 let field = Field::new("typed_value", DataType::Int16, true);
459 let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
460 let result = variant_get(&array, options).unwrap();
461 let expected: ArrayRef = Arc::new(Int16Array::from(vec![Some(1), Some(2), Some(3)]));
462 assert_eq!(&result, &expected)
463 }
464
465 macro_rules! numeric_perfectly_shredded_variant_array_fn {
477 ($func:ident, $array_type:ident, $primitive_type:ty) => {
478 fn $func() -> ArrayRef {
479 let (metadata, _value) = { parquet_variant::VariantBuilder::new().finish() };
482 let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
483 let typed_value = $array_type::from(vec![
484 Some(<$primitive_type>::try_from(1u8).unwrap()),
485 Some(<$primitive_type>::try_from(2u8).unwrap()),
486 Some(<$primitive_type>::try_from(3u8).unwrap()),
487 ]);
488
489 let struct_array = StructArrayBuilder::new()
490 .with_field("metadata", Arc::new(metadata))
491 .with_field("typed_value", Arc::new(typed_value))
492 .build();
493
494 Arc::new(
495 VariantArray::try_new(Arc::new(struct_array))
496 .expect("should create variant array"),
497 )
498 }
499 };
500 }
501
502 numeric_perfectly_shredded_variant_array_fn!(
503 perfectly_shredded_int8_variant_array,
504 Int8Array,
505 i8
506 );
507 numeric_perfectly_shredded_variant_array_fn!(
508 perfectly_shredded_int16_variant_array,
509 Int16Array,
510 i16
511 );
512 numeric_perfectly_shredded_variant_array_fn!(
513 perfectly_shredded_int32_variant_array,
514 Int32Array,
515 i32
516 );
517 numeric_perfectly_shredded_variant_array_fn!(
518 perfectly_shredded_int64_variant_array,
519 Int64Array,
520 i64
521 );
522 numeric_perfectly_shredded_variant_array_fn!(
523 perfectly_shredded_uint8_variant_array,
524 UInt8Array,
525 u8
526 );
527 numeric_perfectly_shredded_variant_array_fn!(
528 perfectly_shredded_uint16_variant_array,
529 UInt16Array,
530 u16
531 );
532 numeric_perfectly_shredded_variant_array_fn!(
533 perfectly_shredded_uint32_variant_array,
534 UInt32Array,
535 u32
536 );
537 numeric_perfectly_shredded_variant_array_fn!(
538 perfectly_shredded_uint64_variant_array,
539 UInt64Array,
540 u64
541 );
542 numeric_perfectly_shredded_variant_array_fn!(
543 perfectly_shredded_float16_variant_array,
544 Float16Array,
545 half::f16
546 );
547 numeric_perfectly_shredded_variant_array_fn!(
548 perfectly_shredded_float32_variant_array,
549 Float32Array,
550 f32
551 );
552 numeric_perfectly_shredded_variant_array_fn!(
553 perfectly_shredded_float64_variant_array,
554 Float64Array,
555 f64
556 );
557
558 macro_rules! numeric_partially_shredded_variant_array_fn {
582 ($func:ident, $array_type:ident, $primitive_type:ty) => {
583 fn $func() -> ArrayRef {
584 let (metadata, string_value) = {
587 let mut builder = parquet_variant::VariantBuilder::new();
588 builder.append_value("n/a");
589 builder.finish()
590 };
591
592 let nulls = NullBuffer::from(vec![
593 true, false, true, true, ]);
598
599 let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
601
602 let values = BinaryViewArray::from(vec![
605 None, Some(b"" as &[u8]), Some(&string_value), None, ]);
610
611 let typed_value = $array_type::from(vec![
612 Some(<$primitive_type>::try_from(34u8).unwrap()), None, None, Some(<$primitive_type>::try_from(100u8).unwrap()), ]);
617
618 let struct_array = StructArrayBuilder::new()
619 .with_field("metadata", Arc::new(metadata))
620 .with_field("typed_value", Arc::new(typed_value))
621 .with_field("value", Arc::new(values))
622 .with_nulls(nulls)
623 .build();
624
625 Arc::new(
626 VariantArray::try_new(Arc::new(struct_array))
627 .expect("should create variant array"),
628 )
629 }
630 };
631 }
632
633 numeric_partially_shredded_variant_array_fn!(
634 partially_shredded_int8_variant_array,
635 Int8Array,
636 i8
637 );
638 numeric_partially_shredded_variant_array_fn!(
639 partially_shredded_int16_variant_array,
640 Int16Array,
641 i16
642 );
643 numeric_partially_shredded_variant_array_fn!(
644 partially_shredded_int32_variant_array,
645 Int32Array,
646 i32
647 );
648 numeric_partially_shredded_variant_array_fn!(
649 partially_shredded_int64_variant_array,
650 Int64Array,
651 i64
652 );
653 numeric_partially_shredded_variant_array_fn!(
654 partially_shredded_uint8_variant_array,
655 UInt8Array,
656 u8
657 );
658 numeric_partially_shredded_variant_array_fn!(
659 partially_shredded_uint16_variant_array,
660 UInt16Array,
661 u16
662 );
663 numeric_partially_shredded_variant_array_fn!(
664 partially_shredded_uint32_variant_array,
665 UInt32Array,
666 u32
667 );
668 numeric_partially_shredded_variant_array_fn!(
669 partially_shredded_uint64_variant_array,
670 UInt64Array,
671 u64
672 );
673 numeric_partially_shredded_variant_array_fn!(
674 partially_shredded_float16_variant_array,
675 Float16Array,
676 half::f16
677 );
678 numeric_partially_shredded_variant_array_fn!(
679 partially_shredded_float32_variant_array,
680 Float32Array,
681 f32
682 );
683 numeric_partially_shredded_variant_array_fn!(
684 partially_shredded_float64_variant_array,
685 Float64Array,
686 f64
687 );
688
689 #[derive(Debug, Default, Clone)]
693 struct StructArrayBuilder {
694 fields: Vec<FieldRef>,
695 arrays: Vec<ArrayRef>,
696 nulls: Option<NullBuffer>,
697 }
698
699 impl StructArrayBuilder {
700 fn new() -> Self {
701 Default::default()
702 }
703
704 fn with_field(mut self, field_name: &str, array: ArrayRef) -> Self {
706 let field = Field::new(field_name, array.data_type().clone(), true);
707 self.fields.push(Arc::new(field));
708 self.arrays.push(array);
709 self
710 }
711
712 fn with_nulls(mut self, nulls: NullBuffer) -> Self {
714 self.nulls = Some(nulls);
715 self
716 }
717
718 pub fn build(self) -> StructArray {
719 let Self {
720 fields,
721 arrays,
722 nulls,
723 } = self;
724 StructArray::new(Fields::from(fields), arrays, nulls)
725 }
726 }
727
728 fn all_null_variant_array() -> ArrayRef {
745 let (metadata, _value) = { parquet_variant::VariantBuilder::new().finish() };
746
747 let nulls = NullBuffer::from(vec![
748 false, false, false, ]);
752
753 let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
755
756 let struct_array = StructArrayBuilder::new()
757 .with_field("metadata", Arc::new(metadata))
758 .with_nulls(nulls)
759 .build();
760
761 Arc::new(
762 VariantArray::try_new(Arc::new(struct_array)).expect("should create variant array"),
763 )
764 }
765}