1use crate::VariantArrayBuilder;
21use crate::type_conversion::{
22 generic_conversion_single_value, generic_conversion_single_value_with_result,
23 primitive_conversion_single_value,
24};
25use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray};
26use arrow::buffer::NullBuffer;
27use arrow::compute::cast;
28use arrow::datatypes::{
29 Date32Type, Decimal32Type, Decimal64Type, Decimal128Type, Float16Type, Float32Type,
30 Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, Time64MicrosecondType,
31 TimestampMicrosecondType, TimestampNanosecondType,
32};
33use arrow::error::Result;
34use arrow_schema::extension::ExtensionType;
35use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, TimeUnit};
36use chrono::{DateTime, NaiveTime};
37use parquet_variant::{
38 Uuid, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantDecimalType as _,
39};
40
41use std::borrow::Cow;
42use std::sync::Arc;
43
44pub struct VariantType;
49
50impl ExtensionType for VariantType {
51 const NAME: &'static str = "arrow.parquet.variant";
52
53 type Metadata = &'static str;
56
57 fn metadata(&self) -> &Self::Metadata {
58 &""
59 }
60
61 fn serialize_metadata(&self) -> Option<String> {
62 Some(String::new())
63 }
64
65 fn deserialize_metadata(_metadata: Option<&str>) -> Result<Self::Metadata> {
66 Ok("")
67 }
68
69 fn supports_data_type(&self, data_type: &DataType) -> Result<()> {
70 if matches!(data_type, DataType::Struct(_)) {
71 Ok(())
72 } else {
73 Err(ArrowError::InvalidArgumentError(format!(
74 "VariantType only supports StructArray, got {data_type}"
75 )))
76 }
77 }
78
79 fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self> {
80 Self.supports_data_type(data_type)?;
81 Ok(Self)
82 }
83}
84
85#[derive(Debug, Clone, PartialEq)]
217pub struct VariantArray {
218 inner: StructArray,
220
221 metadata: BinaryViewArray,
223
224 shredding_state: ShreddingState,
226}
227
228impl VariantArray {
229 pub fn try_new(inner: &dyn Array) -> Result<Self> {
257 let inner = cast_to_binary_view_arrays(inner)?;
260
261 let Some(inner) = inner.as_struct_opt() else {
262 return Err(ArrowError::InvalidArgumentError(
263 "Invalid VariantArray: requires StructArray as input".to_string(),
264 ));
265 };
266
267 let Some(metadata_field) = inner.column_by_name("metadata") else {
271 return Err(ArrowError::InvalidArgumentError(
272 "Invalid VariantArray: StructArray must contain a 'metadata' field".to_string(),
273 ));
274 };
275 let Some(metadata) = metadata_field.as_binary_view_opt() else {
276 return Err(ArrowError::NotYetImplemented(format!(
277 "VariantArray 'metadata' field must be BinaryView, got {}",
278 metadata_field.data_type()
279 )));
280 };
281
282 Ok(Self {
284 inner: inner.clone(),
285 metadata: metadata.clone(),
286 shredding_state: ShreddingState::try_from(inner)?,
287 })
288 }
289
290 pub(crate) fn from_parts(
291 metadata: BinaryViewArray,
292 value: Option<BinaryViewArray>,
293 typed_value: Option<ArrayRef>,
294 nulls: Option<NullBuffer>,
295 ) -> Self {
296 let mut builder =
297 StructArrayBuilder::new().with_field("metadata", Arc::new(metadata.clone()), false);
298 if let Some(value) = value.clone() {
299 builder = builder.with_field("value", Arc::new(value), true);
300 }
301 if let Some(typed_value) = typed_value.clone() {
302 builder = builder.with_field("typed_value", typed_value, true);
303 }
304 if let Some(nulls) = nulls {
305 builder = builder.with_nulls(nulls);
306 }
307
308 Self {
309 inner: builder.build(),
310 metadata,
311 shredding_state: ShreddingState::new(value, typed_value),
312 }
313 }
314
315 pub fn inner(&self) -> &StructArray {
317 &self.inner
318 }
319
320 pub fn into_inner(self) -> StructArray {
322 self.inner
323 }
324
325 pub fn shredding_state(&self) -> &ShreddingState {
327 &self.shredding_state
328 }
329
330 pub fn value(&self, index: usize) -> Variant<'_, '_> {
340 self.try_value(index).unwrap()
341 }
342
343 pub fn try_value(&self, index: usize) -> Result<Variant<'_, '_>> {
371 match (self.typed_value_field(), self.value_field()) {
372 (Some(typed_value), value) if typed_value.is_valid(index) => {
374 typed_value_to_variant(typed_value, value, index)
375 }
376 (_, Some(value)) if value.is_valid(index) => {
378 Ok(Variant::new(self.metadata.value(index), value.value(index)))
379 }
380 _ => Ok(Variant::Null),
383 }
384 }
385
386 pub fn metadata_field(&self) -> &BinaryViewArray {
388 &self.metadata
389 }
390
391 pub fn value_field(&self) -> Option<&BinaryViewArray> {
393 self.shredding_state.value_field()
394 }
395
396 pub fn typed_value_field(&self) -> Option<&ArrayRef> {
398 self.shredding_state.typed_value_field()
399 }
400
401 pub fn field(&self, name: impl Into<String>) -> Field {
404 Field::new(
405 name.into(),
406 self.data_type().clone(),
407 self.inner.is_nullable(),
408 )
409 .with_extension_type(VariantType)
410 }
411
412 pub fn data_type(&self) -> &DataType {
414 self.inner.data_type()
415 }
416
417 pub fn slice(&self, offset: usize, length: usize) -> Self {
418 let inner = self.inner.slice(offset, length);
419 let metadata = self.metadata.slice(offset, length);
420 let shredding_state = self.shredding_state.slice(offset, length);
421 Self {
422 inner,
423 metadata,
424 shredding_state,
425 }
426 }
427
428 pub fn len(&self) -> usize {
429 self.inner.len()
430 }
431
432 pub fn is_empty(&self) -> bool {
433 self.inner.is_empty()
434 }
435
436 pub fn nulls(&self) -> Option<&NullBuffer> {
437 self.inner.nulls()
438 }
439
440 pub fn is_null(&self, index: usize) -> bool {
442 self.nulls().is_some_and(|n| n.is_null(index))
443 }
444
445 pub fn is_valid(&self, index: usize) -> bool {
447 !self.is_null(index)
448 }
449
450 pub fn iter(&self) -> VariantArrayIter<'_> {
452 VariantArrayIter::new(self)
453 }
454}
455
456impl From<VariantArray> for StructArray {
457 fn from(variant_array: VariantArray) -> Self {
458 variant_array.into_inner()
459 }
460}
461
462impl From<VariantArray> for ArrayRef {
463 fn from(variant_array: VariantArray) -> Self {
464 Arc::new(variant_array.into_inner())
465 }
466}
467
468impl<'m, 'v> FromIterator<Option<Variant<'m, 'v>>> for VariantArray {
469 fn from_iter<T: IntoIterator<Item = Option<Variant<'m, 'v>>>>(iter: T) -> Self {
470 let iter = iter.into_iter();
471
472 let mut b = VariantArrayBuilder::new(iter.size_hint().0);
473 b.extend(iter);
474 b.build()
475 }
476}
477
478impl<'m, 'v> FromIterator<Variant<'m, 'v>> for VariantArray {
479 fn from_iter<T: IntoIterator<Item = Variant<'m, 'v>>>(iter: T) -> Self {
480 Self::from_iter(iter.into_iter().map(Some))
481 }
482}
483
484#[derive(Debug)]
509pub struct VariantArrayIter<'a> {
510 array: &'a VariantArray,
511 head_i: usize,
512 tail_i: usize,
513}
514
515impl<'a> VariantArrayIter<'a> {
516 pub fn new(array: &'a VariantArray) -> Self {
518 Self {
519 array,
520 head_i: 0,
521 tail_i: array.len(),
522 }
523 }
524
525 fn value_opt(&self, i: usize) -> Option<Variant<'a, 'a>> {
526 self.array.is_valid(i).then(|| self.array.value(i))
527 }
528}
529
530impl<'a> Iterator for VariantArrayIter<'a> {
531 type Item = Option<Variant<'a, 'a>>;
532
533 #[inline]
534 fn next(&mut self) -> Option<Self::Item> {
535 if self.head_i == self.tail_i {
536 return None;
537 }
538
539 let out = self.value_opt(self.head_i);
540
541 self.head_i += 1;
542
543 Some(out)
544 }
545
546 fn size_hint(&self) -> (usize, Option<usize>) {
547 let remainder = self.tail_i - self.head_i;
548
549 (remainder, Some(remainder))
550 }
551}
552
553impl<'a> DoubleEndedIterator for VariantArrayIter<'a> {
554 fn next_back(&mut self) -> Option<Self::Item> {
555 if self.head_i == self.tail_i {
556 return None;
557 }
558
559 self.tail_i -= 1;
560
561 Some(self.value_opt(self.tail_i))
562 }
563}
564
565impl<'a> ExactSizeIterator for VariantArrayIter<'a> {}
566
567#[derive(Debug)]
602pub struct ShreddedVariantFieldArray {
603 inner: StructArray,
605 shredding_state: ShreddingState,
606}
607
608#[allow(unused)]
609impl ShreddedVariantFieldArray {
610 pub fn try_new(inner: &dyn Array) -> Result<Self> {
631 let Some(inner_struct) = inner.as_struct_opt() else {
632 return Err(ArrowError::InvalidArgumentError(
633 "Invalid ShreddedVariantFieldArray: requires StructArray as input".to_string(),
634 ));
635 };
636
637 Ok(Self {
639 inner: inner_struct.clone(),
640 shredding_state: ShreddingState::try_from(inner_struct)?,
641 })
642 }
643
644 pub fn shredding_state(&self) -> &ShreddingState {
646 &self.shredding_state
647 }
648
649 pub fn value_field(&self) -> Option<&BinaryViewArray> {
651 self.shredding_state.value_field()
652 }
653
654 pub fn typed_value_field(&self) -> Option<&ArrayRef> {
656 self.shredding_state.typed_value_field()
657 }
658
659 pub fn inner(&self) -> &StructArray {
661 &self.inner
662 }
663
664 pub(crate) fn from_parts(
665 value: Option<BinaryViewArray>,
666 typed_value: Option<ArrayRef>,
667 nulls: Option<NullBuffer>,
668 ) -> Self {
669 let mut builder = StructArrayBuilder::new();
670 if let Some(value) = value.clone() {
671 builder = builder.with_field("value", Arc::new(value), true);
672 }
673 if let Some(typed_value) = typed_value.clone() {
674 builder = builder.with_field("typed_value", typed_value, true);
675 }
676 if let Some(nulls) = nulls {
677 builder = builder.with_nulls(nulls);
678 }
679
680 Self {
681 inner: builder.build(),
682 shredding_state: ShreddingState::new(value, typed_value),
683 }
684 }
685
686 pub fn into_inner(self) -> StructArray {
688 self.inner
689 }
690
691 pub fn data_type(&self) -> &DataType {
692 self.inner.data_type()
693 }
694
695 pub fn len(&self) -> usize {
696 self.inner.len()
697 }
698
699 pub fn is_empty(&self) -> bool {
700 self.inner.is_empty()
701 }
702
703 pub fn offset(&self) -> usize {
704 self.inner.offset()
705 }
706
707 pub fn nulls(&self) -> Option<&NullBuffer> {
708 None
712 }
713 pub fn is_null(&self, index: usize) -> bool {
715 self.nulls().is_some_and(|n| n.is_null(index))
716 }
717
718 pub fn is_valid(&self, index: usize) -> bool {
720 !self.is_null(index)
721 }
722}
723
724impl From<ShreddedVariantFieldArray> for ArrayRef {
725 fn from(array: ShreddedVariantFieldArray) -> Self {
726 Arc::new(array.into_inner())
727 }
728}
729
730impl From<ShreddedVariantFieldArray> for StructArray {
731 fn from(array: ShreddedVariantFieldArray) -> Self {
732 array.into_inner()
733 }
734}
735
736#[derive(Debug, Clone, PartialEq)]
770pub struct ShreddingState {
771 value: Option<BinaryViewArray>,
772 typed_value: Option<ArrayRef>,
773}
774
775impl ShreddingState {
776 pub fn new(value: Option<BinaryViewArray>, typed_value: Option<ArrayRef>) -> Self {
791 Self { value, typed_value }
792 }
793
794 pub fn value_field(&self) -> Option<&BinaryViewArray> {
796 self.value.as_ref()
797 }
798
799 pub fn typed_value_field(&self) -> Option<&ArrayRef> {
801 self.typed_value.as_ref()
802 }
803
804 pub fn borrow(&self) -> BorrowedShreddingState<'_> {
806 BorrowedShreddingState {
807 value: self.value_field(),
808 typed_value: self.typed_value_field(),
809 }
810 }
811
812 pub fn slice(&self, offset: usize, length: usize) -> Self {
814 Self {
815 value: self.value.as_ref().map(|v| v.slice(offset, length)),
816 typed_value: self.typed_value.as_ref().map(|tv| tv.slice(offset, length)),
817 }
818 }
819}
820
821#[derive(Clone, Debug)]
824pub struct BorrowedShreddingState<'a> {
825 value: Option<&'a BinaryViewArray>,
826 typed_value: Option<&'a ArrayRef>,
827}
828
829impl<'a> BorrowedShreddingState<'a> {
830 pub fn new(value: Option<&'a BinaryViewArray>, typed_value: Option<&'a ArrayRef>) -> Self {
845 Self { value, typed_value }
846 }
847
848 pub fn value_field(&self) -> Option<&'a BinaryViewArray> {
850 self.value
851 }
852
853 pub fn typed_value_field(&self) -> Option<&'a ArrayRef> {
855 self.typed_value
856 }
857}
858
859impl<'a> TryFrom<&'a StructArray> for BorrowedShreddingState<'a> {
860 type Error = ArrowError;
861
862 fn try_from(inner_struct: &'a StructArray) -> Result<Self> {
863 let value = if let Some(value_col) = inner_struct.column_by_name("value") {
865 let Some(binary_view) = value_col.as_binary_view_opt() else {
866 return Err(ArrowError::NotYetImplemented(format!(
867 "VariantArray 'value' field must be BinaryView, got {}",
868 value_col.data_type()
869 )));
870 };
871 Some(binary_view)
872 } else {
873 None
874 };
875 let typed_value = inner_struct.column_by_name("typed_value");
876 Ok(BorrowedShreddingState::new(value, typed_value))
877 }
878}
879
880impl TryFrom<&StructArray> for ShreddingState {
881 type Error = ArrowError;
882
883 fn try_from(inner_struct: &StructArray) -> Result<Self> {
884 Ok(BorrowedShreddingState::try_from(inner_struct)?.into())
885 }
886}
887
888impl From<BorrowedShreddingState<'_>> for ShreddingState {
889 fn from(state: BorrowedShreddingState<'_>) -> Self {
890 ShreddingState {
891 value: state.value_field().cloned(),
892 typed_value: state.typed_value_field().cloned(),
893 }
894 }
895}
896
897#[derive(Debug, Default, Clone)]
901pub(crate) struct StructArrayBuilder {
902 fields: Vec<FieldRef>,
903 arrays: Vec<ArrayRef>,
904 nulls: Option<NullBuffer>,
905}
906
907impl StructArrayBuilder {
908 pub fn new() -> Self {
909 Default::default()
910 }
911
912 pub fn with_field(mut self, field_name: &str, array: ArrayRef, nullable: bool) -> Self {
914 let field = Field::new(field_name, array.data_type().clone(), nullable);
915 self.fields.push(Arc::new(field));
916 self.arrays.push(array);
917 self
918 }
919
920 pub fn with_nulls(mut self, nulls: NullBuffer) -> Self {
922 self.nulls = Some(nulls);
923 self
924 }
925
926 pub fn build(self) -> StructArray {
927 let Self {
928 fields,
929 arrays,
930 nulls,
931 } = self;
932 StructArray::new(Fields::from(fields), arrays, nulls)
933 }
934}
935
936fn typed_value_to_variant<'a>(
938 typed_value: &'a ArrayRef,
939 value: Option<&BinaryViewArray>,
940 index: usize,
941) -> Result<Variant<'a, 'a>> {
942 let data_type = typed_value.data_type();
943 if value.is_some_and(|v| !matches!(data_type, DataType::Struct(_)) && v.is_valid(index)) {
944 panic!("Invalid variant, conflicting value and typed_value");
946 }
947 match data_type {
948 DataType::Null => Ok(Variant::Null),
949 DataType::Boolean => {
950 let boolean_array = typed_value.as_boolean();
951 let value = boolean_array.value(index);
952 Ok(Variant::from(value))
953 }
954 DataType::FixedSizeBinary(16) => {
956 let array = typed_value.as_fixed_size_binary();
957 let value = array.value(index);
958 Ok(Uuid::from_slice(value).unwrap().into()) }
960 DataType::BinaryView => {
961 let array = typed_value.as_binary_view();
962 let value = array.value(index);
963 Ok(Variant::from(value))
964 }
965 DataType::Utf8 => {
966 let array = typed_value.as_string::<i32>();
967 let value = array.value(index);
968 Ok(Variant::from(value))
969 }
970 DataType::LargeUtf8 => {
971 let array = typed_value.as_string::<i64>();
972 let value = array.value(index);
973 Ok(Variant::from(value))
974 }
975 DataType::Utf8View => {
976 let array = typed_value.as_string_view();
977 let value = array.value(index);
978 Ok(Variant::from(value))
979 }
980 DataType::Int8 => {
981 primitive_conversion_single_value!(Int8Type, typed_value, index)
982 }
983 DataType::Int16 => {
984 primitive_conversion_single_value!(Int16Type, typed_value, index)
985 }
986 DataType::Int32 => {
987 primitive_conversion_single_value!(Int32Type, typed_value, index)
988 }
989 DataType::Int64 => {
990 primitive_conversion_single_value!(Int64Type, typed_value, index)
991 }
992 DataType::Float16 => {
993 primitive_conversion_single_value!(Float16Type, typed_value, index)
994 }
995 DataType::Float32 => {
996 primitive_conversion_single_value!(Float32Type, typed_value, index)
997 }
998 DataType::Float64 => {
999 primitive_conversion_single_value!(Float64Type, typed_value, index)
1000 }
1001 DataType::Decimal32(_, s) => {
1002 generic_conversion_single_value_with_result!(
1003 Decimal32Type,
1004 as_primitive,
1005 |v| VariantDecimal4::try_new(v, *s as u8),
1006 typed_value,
1007 index
1008 )
1009 }
1010 DataType::Decimal64(_, s) => {
1011 generic_conversion_single_value_with_result!(
1012 Decimal64Type,
1013 as_primitive,
1014 |v| VariantDecimal8::try_new(v, *s as u8),
1015 typed_value,
1016 index
1017 )
1018 }
1019 DataType::Decimal128(_, s) => {
1020 generic_conversion_single_value_with_result!(
1021 Decimal128Type,
1022 as_primitive,
1023 |v| VariantDecimal16::try_new(v, *s as u8),
1024 typed_value,
1025 index
1026 )
1027 }
1028 DataType::Date32 => {
1029 generic_conversion_single_value!(
1030 Date32Type,
1031 as_primitive,
1032 Date32Type::to_naive_date,
1033 typed_value,
1034 index
1035 )
1036 }
1037 DataType::Time64(TimeUnit::Microsecond) => {
1038 generic_conversion_single_value_with_result!(
1039 Time64MicrosecondType,
1040 as_primitive,
1041 |v| NaiveTime::from_num_seconds_from_midnight_opt(
1042 (v / 1_000_000) as u32,
1043 (v % 1_000_000) as u32 * 1000
1044 )
1045 .ok_or_else(|| format!("Invalid microsecond from midnight: {}", v)),
1046 typed_value,
1047 index
1048 )
1049 }
1050 DataType::Timestamp(TimeUnit::Microsecond, Some(_)) => {
1051 generic_conversion_single_value!(
1052 TimestampMicrosecondType,
1053 as_primitive,
1054 |v| DateTime::from_timestamp_micros(v).unwrap(),
1055 typed_value,
1056 index
1057 )
1058 }
1059 DataType::Timestamp(TimeUnit::Microsecond, None) => {
1060 generic_conversion_single_value!(
1061 TimestampMicrosecondType,
1062 as_primitive,
1063 |v| DateTime::from_timestamp_micros(v).unwrap().naive_utc(),
1064 typed_value,
1065 index
1066 )
1067 }
1068 DataType::Timestamp(TimeUnit::Nanosecond, Some(_)) => {
1069 generic_conversion_single_value!(
1070 TimestampNanosecondType,
1071 as_primitive,
1072 DateTime::from_timestamp_nanos,
1073 typed_value,
1074 index
1075 )
1076 }
1077 DataType::Timestamp(TimeUnit::Nanosecond, None) => {
1078 generic_conversion_single_value!(
1079 TimestampNanosecondType,
1080 as_primitive,
1081 |v| DateTime::from_timestamp_nanos(v).naive_utc(),
1082 typed_value,
1083 index
1084 )
1085 }
1086 _ => {
1089 debug_assert!(
1093 false,
1094 "Unsupported typed_value type: {}",
1095 typed_value.data_type()
1096 );
1097 Ok(Variant::Null)
1098 }
1099 }
1100}
1101
1102fn cast_to_binary_view_arrays(array: &dyn Array) -> Result<ArrayRef> {
1113 let new_type = canonicalize_and_verify_data_type(array.data_type())?;
1114 if let Cow::Borrowed(_) = new_type {
1115 if let Some(array) = array.as_struct_opt() {
1116 return Ok(Arc::new(array.clone())); }
1118 }
1119 cast(array, new_type.as_ref())
1120}
1121
1122fn canonicalize_and_verify_data_type(data_type: &DataType) -> Result<Cow<'_, DataType>> {
1126 use DataType::*;
1127
1128 macro_rules! fail {
1130 () => {
1131 return Err(ArrowError::InvalidArgumentError(format!(
1132 "Illegal shredded value type: {data_type}"
1133 )))
1134 };
1135 }
1136 macro_rules! borrow {
1137 () => {
1138 Cow::Borrowed(data_type)
1139 };
1140 }
1141
1142 let new_data_type = match data_type {
1143 Null | Boolean => borrow!(),
1145 Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => borrow!(),
1146
1147 UInt8 | UInt16 | UInt32 | UInt64 | Float16 => fail!(),
1149
1150 Decimal64(p, s) | Decimal128(p, s)
1155 if VariantDecimal4::is_valid_precision_and_scale(p, s) =>
1156 {
1157 Cow::Owned(Decimal32(*p, *s))
1158 }
1159 Decimal128(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => {
1160 Cow::Owned(Decimal64(*p, *s))
1161 }
1162 Decimal32(p, s) if VariantDecimal4::is_valid_precision_and_scale(p, s) => borrow!(),
1163 Decimal64(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => borrow!(),
1164 Decimal128(p, s) if VariantDecimal16::is_valid_precision_and_scale(p, s) => borrow!(),
1165 Decimal32(..) | Decimal64(..) | Decimal128(..) | Decimal256(..) => fail!(),
1166
1167 Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _) => borrow!(),
1169 Timestamp(TimeUnit::Millisecond | TimeUnit::Second, _) => fail!(),
1170
1171 Date32 | Time64(TimeUnit::Microsecond) => borrow!(),
1173 Date64 | Time32(_) | Time64(_) | Duration(_) | Interval(_) => fail!(),
1174
1175 Binary | LargeBinary => Cow::Owned(BinaryView),
1178 BinaryView | Utf8 | LargeUtf8 | Utf8View => borrow!(),
1179
1180 FixedSizeBinary(16) => borrow!(),
1182 FixedSizeBinary(_) | FixedSizeList(..) => fail!(),
1183
1184 ListView(_) | LargeList(_) | LargeListView(_) => {
1186 fail!()
1187 }
1188
1189 List(field) => match canonicalize_and_verify_field(field)? {
1191 Cow::Borrowed(_) => borrow!(),
1192 Cow::Owned(new_field) => Cow::Owned(DataType::List(new_field)),
1193 },
1194 Struct(fields) => {
1196 let mut new_fields = std::collections::HashMap::new();
1199 for (i, field) in fields.iter().enumerate() {
1200 if let Cow::Owned(new_field) = canonicalize_and_verify_field(field)? {
1201 new_fields.insert(i, new_field);
1202 }
1203 }
1204
1205 if new_fields.is_empty() {
1206 borrow!()
1207 } else {
1208 let new_fields = fields
1209 .iter()
1210 .enumerate()
1211 .map(|(i, field)| new_fields.remove(&i).unwrap_or_else(|| field.clone()));
1212 Cow::Owned(DataType::Struct(new_fields.collect()))
1213 }
1214 }
1215 Map(..) | Union(..) => fail!(),
1216
1217 Dictionary(..) | RunEndEncoded(..) => fail!(),
1219 };
1220 Ok(new_data_type)
1221}
1222
1223fn canonicalize_and_verify_field(field: &Arc<Field>) -> Result<Cow<'_, Arc<Field>>> {
1224 let Cow::Owned(new_data_type) = canonicalize_and_verify_data_type(field.data_type())? else {
1225 return Ok(Cow::Borrowed(field));
1226 };
1227 let new_field = field.as_ref().clone().with_data_type(new_data_type);
1228 Ok(Cow::Owned(Arc::new(new_field)))
1229}
1230
1231#[cfg(test)]
1232mod test {
1233 use crate::VariantArrayBuilder;
1234 use std::str::FromStr;
1235
1236 use super::*;
1237 use arrow::array::{
1238 BinaryViewArray, Decimal32Array, Decimal64Array, Decimal128Array, Int32Array,
1239 Time64MicrosecondArray,
1240 };
1241 use arrow_schema::{Field, Fields};
1242 use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, ShortString};
1243
1244 #[test]
1245 fn invalid_not_a_struct_array() {
1246 let array = make_binary_view_array();
1247 let err = VariantArray::try_new(&array);
1249 assert_eq!(
1250 err.unwrap_err().to_string(),
1251 "Invalid argument error: Invalid VariantArray: requires StructArray as input"
1252 );
1253 }
1254
1255 #[test]
1256 fn invalid_missing_metadata() {
1257 let fields = Fields::from(vec![Field::new("value", DataType::BinaryView, true)]);
1258 let array = StructArray::new(fields, vec![make_binary_view_array()], None);
1259 let err = VariantArray::try_new(&array);
1261 assert_eq!(
1262 err.unwrap_err().to_string(),
1263 "Invalid argument error: Invalid VariantArray: StructArray must contain a 'metadata' field"
1264 );
1265 }
1266
1267 #[test]
1268 fn all_null_missing_value_and_typed_value() {
1269 let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
1270 let array = StructArray::new(fields, vec![make_binary_view_array()], None);
1271
1272 let variant_array = VariantArray::try_new(&array).unwrap();
1276
1277 assert!(matches!(
1279 variant_array.shredding_state(),
1280 ShreddingState {
1281 value: None,
1282 typed_value: None
1283 }
1284 ));
1285
1286 for i in 0..variant_array.len() {
1288 if variant_array.is_valid(i) {
1289 assert_eq!(variant_array.value(i), parquet_variant::Variant::Null);
1290 }
1291 }
1292 }
1293
1294 #[test]
1295 fn invalid_metadata_field_type() {
1296 let fields = Fields::from(vec![
1297 Field::new("metadata", DataType::Int32, true), Field::new("value", DataType::BinaryView, true),
1299 ]);
1300 let array = StructArray::new(
1301 fields,
1302 vec![make_int32_array(), make_binary_view_array()],
1303 None,
1304 );
1305 let err = VariantArray::try_new(&array);
1306 assert_eq!(
1307 err.unwrap_err().to_string(),
1308 "Not yet implemented: VariantArray 'metadata' field must be BinaryView, got Int32"
1309 );
1310 }
1311
1312 #[test]
1313 fn invalid_value_field_type() {
1314 let fields = Fields::from(vec![
1315 Field::new("metadata", DataType::BinaryView, true),
1316 Field::new("value", DataType::Int32, true), ]);
1318 let array = StructArray::new(
1319 fields,
1320 vec![make_binary_view_array(), make_int32_array()],
1321 None,
1322 );
1323 let err = VariantArray::try_new(&array);
1324 assert_eq!(
1325 err.unwrap_err().to_string(),
1326 "Not yet implemented: VariantArray 'value' field must be BinaryView, got Int32"
1327 );
1328 }
1329
1330 fn make_binary_view_array() -> ArrayRef {
1331 Arc::new(BinaryViewArray::from(vec![b"test" as &[u8]]))
1332 }
1333
1334 fn make_int32_array() -> ArrayRef {
1335 Arc::new(Int32Array::from(vec![1]))
1336 }
1337
1338 #[test]
1339 fn all_null_shredding_state() {
1340 assert!(matches!(
1342 ShreddingState::new(None, None),
1343 ShreddingState {
1344 value: None,
1345 typed_value: None
1346 }
1347 ));
1348 }
1349
1350 #[test]
1351 fn all_null_variant_array_construction() {
1352 let metadata = BinaryViewArray::from(vec![b"test" as &[u8]; 3]);
1353 let nulls = NullBuffer::from(vec![false, false, false]); let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
1356 let struct_array = StructArray::new(fields, vec![Arc::new(metadata)], Some(nulls));
1357
1358 let variant_array = VariantArray::try_new(&struct_array).unwrap();
1359
1360 assert!(matches!(
1362 variant_array.shredding_state(),
1363 ShreddingState {
1364 value: None,
1365 typed_value: None
1366 }
1367 ));
1368
1369 assert_eq!(variant_array.len(), 3);
1371 assert!(!variant_array.is_valid(0));
1372 assert!(!variant_array.is_valid(1));
1373 assert!(!variant_array.is_valid(2));
1374
1375 for i in 0..variant_array.len() {
1377 assert!(
1378 !variant_array.is_valid(i),
1379 "Expected value at index {i} to be null"
1380 );
1381 }
1382 }
1383
1384 #[test]
1385 fn value_field_present_but_all_null_should_be_unshredded() {
1386 let metadata = BinaryViewArray::from(vec![b"test" as &[u8]; 3]);
1389
1390 let value_nulls = NullBuffer::from(vec![false, false, false]); let value_array = BinaryViewArray::from_iter_values(vec![""; 3]);
1393 let value_data = value_array
1394 .to_data()
1395 .into_builder()
1396 .nulls(Some(value_nulls))
1397 .build()
1398 .unwrap();
1399 let value = BinaryViewArray::from(value_data);
1400
1401 let fields = Fields::from(vec![
1402 Field::new("metadata", DataType::BinaryView, false),
1403 Field::new("value", DataType::BinaryView, true), ]);
1405 let struct_array = StructArray::new(
1406 fields,
1407 vec![Arc::new(metadata), Arc::new(value)],
1408 None, );
1410
1411 let variant_array = VariantArray::try_new(&struct_array).unwrap();
1412
1413 assert!(matches!(
1415 variant_array.shredding_state(),
1416 ShreddingState {
1417 value: Some(_),
1418 typed_value: None
1419 }
1420 ));
1421 }
1422
1423 #[test]
1424 fn test_variant_array_iterable() {
1425 let mut b = VariantArrayBuilder::new(6);
1426
1427 b.append_null();
1428 b.append_variant(Variant::from(1_i8));
1429 b.append_variant(Variant::Null);
1430 b.append_variant(Variant::from(2_i32));
1431 b.append_variant(Variant::from(3_i64));
1432 b.append_null();
1433
1434 let v = b.build();
1435
1436 let variants = v.iter().collect::<Vec<_>>();
1437
1438 assert_eq!(
1439 variants,
1440 vec![
1441 None,
1442 Some(Variant::Int8(1)),
1443 Some(Variant::Null),
1444 Some(Variant::Int32(2)),
1445 Some(Variant::Int64(3)),
1446 None,
1447 ]
1448 );
1449 }
1450
1451 #[test]
1452 fn test_variant_array_iter_double_ended() {
1453 let mut b = VariantArrayBuilder::new(5);
1454
1455 b.append_variant(Variant::from(0_i32));
1456 b.append_null();
1457 b.append_variant(Variant::from(2_i32));
1458 b.append_null();
1459 b.append_variant(Variant::from(4_i32));
1460
1461 let array = b.build();
1462 let mut iter = array.iter();
1463
1464 assert_eq!(iter.next(), Some(Some(Variant::from(0_i32))));
1465 assert_eq!(iter.next(), Some(None));
1466
1467 assert_eq!(iter.next_back(), Some(Some(Variant::from(4_i32))));
1468 assert_eq!(iter.next_back(), Some(None));
1469 assert_eq!(iter.next_back(), Some(Some(Variant::from(2_i32))));
1470
1471 assert_eq!(iter.next_back(), None);
1472 assert_eq!(iter.next(), None);
1473 }
1474
1475 #[test]
1476 fn test_variant_array_iter_reverse() {
1477 let mut b = VariantArrayBuilder::new(5);
1478
1479 b.append_variant(Variant::from("a"));
1480 b.append_null();
1481 b.append_variant(Variant::from("aaa"));
1482 b.append_null();
1483 b.append_variant(Variant::from("aaaaa"));
1484
1485 let array = b.build();
1486
1487 let result: Vec<_> = array.iter().rev().collect();
1488 assert_eq!(
1489 result,
1490 vec![
1491 Some(Variant::from("aaaaa")),
1492 None,
1493 Some(Variant::from("aaa")),
1494 None,
1495 Some(Variant::from("a")),
1496 ]
1497 );
1498 }
1499
1500 #[test]
1501 fn test_variant_array_iter_empty() {
1502 let v = VariantArrayBuilder::new(0).build();
1503 let mut i = v.iter();
1504 assert!(i.next().is_none());
1505 assert!(i.next_back().is_none());
1506 }
1507
1508 #[test]
1509 fn test_from_variant_opts_into_variant_array() {
1510 let v = vec![None, Some(Variant::Null), Some(Variant::BooleanFalse), None];
1511
1512 let variant_array = VariantArray::from_iter(v);
1513
1514 assert_eq!(variant_array.len(), 4);
1515
1516 assert!(variant_array.is_null(0));
1517
1518 assert!(!variant_array.is_null(1));
1519 assert_eq!(variant_array.value(1), Variant::Null);
1520
1521 assert!(!variant_array.is_null(2));
1522 assert_eq!(variant_array.value(2), Variant::BooleanFalse);
1523
1524 assert!(variant_array.is_null(3));
1525 }
1526
1527 #[test]
1528 fn test_from_variants_into_variant_array() {
1529 let v = vec![
1530 Variant::Null,
1531 Variant::BooleanFalse,
1532 Variant::ShortString(ShortString::try_new("norm").unwrap()),
1533 ];
1534
1535 let variant_array = VariantArray::from_iter(v);
1536
1537 assert_eq!(variant_array.len(), 3);
1538
1539 assert!(!variant_array.is_null(0));
1540 assert_eq!(variant_array.value(0), Variant::Null);
1541
1542 assert!(!variant_array.is_null(1));
1543 assert_eq!(variant_array.value(1), Variant::BooleanFalse);
1544
1545 assert!(!variant_array.is_null(2));
1546 assert_eq!(
1547 variant_array.value(2),
1548 Variant::ShortString(ShortString::try_new("norm").unwrap())
1549 );
1550 }
1551
1552 #[test]
1553 fn test_variant_equality() {
1554 let v_iter = [None, Some(Variant::BooleanFalse), Some(Variant::Null), None];
1555 let v = VariantArray::from_iter(v_iter.clone());
1556
1557 {
1558 let v_copy = v.clone();
1559 assert_eq!(v, v_copy);
1560 }
1561
1562 {
1563 let v_iter_reversed = v_iter.iter().cloned().rev();
1564 let v_reversed = VariantArray::from_iter(v_iter_reversed);
1565
1566 assert_ne!(v, v_reversed);
1567 }
1568
1569 {
1570 let v_sliced = v.slice(0, 1);
1571 assert_ne!(v, v_sliced);
1572 }
1573 }
1574
1575 macro_rules! invalid_variant_array_test {
1576 ($fn_name: ident, $invalid_typed_value: expr, $error_msg: literal) => {
1577 #[test]
1578 fn $fn_name() {
1579 let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
1580 EMPTY_VARIANT_METADATA_BYTES,
1581 1,
1582 ));
1583 let invalid_typed_value = $invalid_typed_value;
1584
1585 let struct_array = StructArrayBuilder::new()
1586 .with_field("metadata", Arc::new(metadata), false)
1587 .with_field("typed_value", Arc::new(invalid_typed_value), true)
1588 .build();
1589
1590 let array: VariantArray = VariantArray::try_new(&struct_array)
1591 .expect("should create variant array")
1592 .into();
1593
1594 let result = array.try_value(0);
1595 assert!(result.is_err());
1596 let error = result.unwrap_err();
1597 assert!(matches!(error, ArrowError::CastError(_)));
1598
1599 let expected: &str = $error_msg;
1600 assert!(
1601 error.to_string().contains($error_msg),
1602 "error `{}` did not contain `{}`",
1603 error,
1604 expected
1605 )
1606 }
1607 };
1608 }
1609
1610 invalid_variant_array_test!(
1611 test_variant_array_invalide_time,
1612 Time64MicrosecondArray::from(vec![Some(86401000000)]),
1613 "Cast error: Cast failed at index 0 (array type: Time64(µs)): Invalid microsecond from midnight: 86401000000"
1614 );
1615
1616 invalid_variant_array_test!(
1617 test_variant_array_invalid_decimal32,
1618 Decimal32Array::from(vec![Some(1234567890)]),
1619 "Cast error: Cast failed at index 0 (array type: Decimal32(9, 2)): Invalid argument error: 1234567890 is wider than max precision 9"
1620 );
1621
1622 invalid_variant_array_test!(
1623 test_variant_array_invalid_decimal64,
1624 Decimal64Array::from(vec![Some(1234567890123456789)]),
1625 "Cast error: Cast failed at index 0 (array type: Decimal64(18, 6)): Invalid argument error: 1234567890123456789 is wider than max precision 18"
1626 );
1627
1628 invalid_variant_array_test!(
1629 test_variant_array_invalid_decimal128,
1630 Decimal128Array::from(vec![Some(
1631 i128::from_str("123456789012345678901234567890123456789").unwrap()
1632 ),]),
1633 "Cast error: Cast failed at index 0 (array type: Decimal128(38, 10)): Invalid argument error: 123456789012345678901234567890123456789 is wider than max precision 38"
1634 );
1635}