1use crate::type_conversion::CastOptions;
19use arrow::array::{
20 Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray,
21 GenericStringArray, OffsetSizeTrait, PrimitiveArray,
22};
23use arrow::compute::kernels::cast;
24use arrow::datatypes::{
25 self as datatypes, ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType,
26 DecimalType, RunEndIndexType,
27};
28use arrow::temporal_conversions::{as_date, as_datetime, as_time};
29use arrow_schema::{ArrowError, DataType, TimeUnit};
30use chrono::{DateTime, TimeZone, Utc};
31use parquet_variant::{
32 ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
33 VariantDecimal16, VariantDecimalType,
34};
35use std::collections::HashMap;
36use std::ops::Range;
37
38pub(crate) enum ArrowToVariantRowBuilder<'a> {
44 Null(NullArrowToVariantBuilder),
45 Boolean(BooleanArrowToVariantBuilder<'a>),
46 PrimitiveInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::Int8Type>),
47 PrimitiveInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::Int16Type>),
48 PrimitiveInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::Int32Type>),
49 PrimitiveInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::Int64Type>),
50 PrimitiveUInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt8Type>),
51 PrimitiveUInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt16Type>),
52 PrimitiveUInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt32Type>),
53 PrimitiveUInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt64Type>),
54 PrimitiveFloat16(PrimitiveArrowToVariantBuilder<'a, datatypes::Float16Type>),
55 PrimitiveFloat32(PrimitiveArrowToVariantBuilder<'a, datatypes::Float32Type>),
56 PrimitiveFloat64(PrimitiveArrowToVariantBuilder<'a, datatypes::Float64Type>),
57 Decimal32(DecimalArrowToVariantBuilder<'a, datatypes::Decimal32Type, VariantDecimal4>),
58 Decimal64(DecimalArrowToVariantBuilder<'a, datatypes::Decimal64Type, VariantDecimal8>),
59 Decimal128(DecimalArrowToVariantBuilder<'a, datatypes::Decimal128Type, VariantDecimal16>),
60 Decimal256(Decimal256ArrowToVariantBuilder<'a>),
61 TimestampSecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampSecondType>),
62 TimestampMillisecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMillisecondType>),
63 TimestampMicrosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMicrosecondType>),
64 TimestampNanosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampNanosecondType>),
65 Date32(DateArrowToVariantBuilder<'a, datatypes::Date32Type>),
66 Date64(DateArrowToVariantBuilder<'a, datatypes::Date64Type>),
67 Time32Second(TimeArrowToVariantBuilder<'a, datatypes::Time32SecondType>),
68 Time32Millisecond(TimeArrowToVariantBuilder<'a, datatypes::Time32MillisecondType>),
69 Time64Microsecond(TimeArrowToVariantBuilder<'a, datatypes::Time64MicrosecondType>),
70 Time64Nanosecond(TimeArrowToVariantBuilder<'a, datatypes::Time64NanosecondType>),
71 Binary(BinaryArrowToVariantBuilder<'a, i32>),
72 LargeBinary(BinaryArrowToVariantBuilder<'a, i64>),
73 BinaryView(BinaryViewArrowToVariantBuilder<'a>),
74 FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder<'a>),
75 Utf8(StringArrowToVariantBuilder<'a, i32>),
76 LargeUtf8(StringArrowToVariantBuilder<'a, i64>),
77 Utf8View(StringViewArrowToVariantBuilder<'a>),
78 List(ListArrowToVariantBuilder<'a, GenericListArray<i32>>),
79 LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
80 ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
81 LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
82 FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
83 Struct(StructArrowToVariantBuilder<'a>),
84 Map(MapArrowToVariantBuilder<'a>),
85 Union(UnionArrowToVariantBuilder<'a>),
86 Dictionary(DictionaryArrowToVariantBuilder<'a>),
87 RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int16Type>),
88 RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int32Type>),
89 RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int64Type>),
90}
91
92impl<'a> ArrowToVariantRowBuilder<'a> {
93 pub fn append_row(
95 &mut self,
96 builder: &mut impl VariantBuilderExt,
97 index: usize,
98 ) -> Result<(), ArrowError> {
99 use ArrowToVariantRowBuilder::*;
100 match self {
101 Null(b) => b.append_row(builder, index),
102 Boolean(b) => b.append_row(builder, index),
103 PrimitiveInt8(b) => b.append_row(builder, index),
104 PrimitiveInt16(b) => b.append_row(builder, index),
105 PrimitiveInt32(b) => b.append_row(builder, index),
106 PrimitiveInt64(b) => b.append_row(builder, index),
107 PrimitiveUInt8(b) => b.append_row(builder, index),
108 PrimitiveUInt16(b) => b.append_row(builder, index),
109 PrimitiveUInt32(b) => b.append_row(builder, index),
110 PrimitiveUInt64(b) => b.append_row(builder, index),
111 PrimitiveFloat16(b) => b.append_row(builder, index),
112 PrimitiveFloat32(b) => b.append_row(builder, index),
113 PrimitiveFloat64(b) => b.append_row(builder, index),
114 Decimal32(b) => b.append_row(builder, index),
115 Decimal64(b) => b.append_row(builder, index),
116 Decimal128(b) => b.append_row(builder, index),
117 Decimal256(b) => b.append_row(builder, index),
118 TimestampSecond(b) => b.append_row(builder, index),
119 TimestampMillisecond(b) => b.append_row(builder, index),
120 TimestampMicrosecond(b) => b.append_row(builder, index),
121 TimestampNanosecond(b) => b.append_row(builder, index),
122 Date32(b) => b.append_row(builder, index),
123 Date64(b) => b.append_row(builder, index),
124 Time32Second(b) => b.append_row(builder, index),
125 Time32Millisecond(b) => b.append_row(builder, index),
126 Time64Microsecond(b) => b.append_row(builder, index),
127 Time64Nanosecond(b) => b.append_row(builder, index),
128 Binary(b) => b.append_row(builder, index),
129 LargeBinary(b) => b.append_row(builder, index),
130 BinaryView(b) => b.append_row(builder, index),
131 FixedSizeBinary(b) => b.append_row(builder, index),
132 Utf8(b) => b.append_row(builder, index),
133 LargeUtf8(b) => b.append_row(builder, index),
134 Utf8View(b) => b.append_row(builder, index),
135 List(b) => b.append_row(builder, index),
136 LargeList(b) => b.append_row(builder, index),
137 ListView(b) => b.append_row(builder, index),
138 LargeListView(b) => b.append_row(builder, index),
139 FixedSizeList(b) => b.append_row(builder, index),
140 Struct(b) => b.append_row(builder, index),
141 Map(b) => b.append_row(builder, index),
142 Union(b) => b.append_row(builder, index),
143 Dictionary(b) => b.append_row(builder, index),
144 RunEndEncodedInt16(b) => b.append_row(builder, index),
145 RunEndEncodedInt32(b) => b.append_row(builder, index),
146 RunEndEncodedInt64(b) => b.append_row(builder, index),
147 }
148 }
149}
150
151pub(crate) fn make_arrow_to_variant_row_builder<'a>(
153 data_type: &'a DataType,
154 array: &'a dyn Array,
155 options: &'a CastOptions,
156) -> Result<ArrowToVariantRowBuilder<'a>, ArrowError> {
157 use ArrowToVariantRowBuilder::*;
158 let builder =
159 match data_type {
160 DataType::Null => Null(NullArrowToVariantBuilder),
161 DataType::Boolean => Boolean(BooleanArrowToVariantBuilder::new(array)),
162 DataType::Int8 => PrimitiveInt8(PrimitiveArrowToVariantBuilder::new(array)),
163 DataType::Int16 => PrimitiveInt16(PrimitiveArrowToVariantBuilder::new(array)),
164 DataType::Int32 => PrimitiveInt32(PrimitiveArrowToVariantBuilder::new(array)),
165 DataType::Int64 => PrimitiveInt64(PrimitiveArrowToVariantBuilder::new(array)),
166 DataType::UInt8 => PrimitiveUInt8(PrimitiveArrowToVariantBuilder::new(array)),
167 DataType::UInt16 => PrimitiveUInt16(PrimitiveArrowToVariantBuilder::new(array)),
168 DataType::UInt32 => PrimitiveUInt32(PrimitiveArrowToVariantBuilder::new(array)),
169 DataType::UInt64 => PrimitiveUInt64(PrimitiveArrowToVariantBuilder::new(array)),
170 DataType::Float16 => PrimitiveFloat16(PrimitiveArrowToVariantBuilder::new(array)),
171 DataType::Float32 => PrimitiveFloat32(PrimitiveArrowToVariantBuilder::new(array)),
172 DataType::Float64 => PrimitiveFloat64(PrimitiveArrowToVariantBuilder::new(array)),
173 DataType::Decimal32(_, scale) => {
174 Decimal32(DecimalArrowToVariantBuilder::new(array, options, *scale))
175 }
176 DataType::Decimal64(_, scale) => {
177 Decimal64(DecimalArrowToVariantBuilder::new(array, options, *scale))
178 }
179 DataType::Decimal128(_, scale) => {
180 Decimal128(DecimalArrowToVariantBuilder::new(array, options, *scale))
181 }
182 DataType::Decimal256(_, scale) => {
183 Decimal256(Decimal256ArrowToVariantBuilder::new(array, options, *scale))
184 }
185 DataType::Timestamp(time_unit, time_zone) => {
186 match time_unit {
187 TimeUnit::Second => TimestampSecond(TimestampArrowToVariantBuilder::new(
188 array,
189 options,
190 time_zone.is_some(),
191 )),
192 TimeUnit::Millisecond => TimestampMillisecond(
193 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
194 ),
195 TimeUnit::Microsecond => TimestampMicrosecond(
196 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
197 ),
198 TimeUnit::Nanosecond => TimestampNanosecond(
199 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
200 ),
201 }
202 }
203 DataType::Date32 => Date32(DateArrowToVariantBuilder::new(array, options)),
204 DataType::Date64 => Date64(DateArrowToVariantBuilder::new(array, options)),
205 DataType::Time32(time_unit) => match time_unit {
206 TimeUnit::Second => Time32Second(TimeArrowToVariantBuilder::new(array, options)),
207 TimeUnit::Millisecond => {
208 Time32Millisecond(TimeArrowToVariantBuilder::new(array, options))
209 }
210 _ => {
211 return Err(ArrowError::CastError(format!(
212 "Unsupported Time32 unit: {time_unit:?}"
213 )));
214 }
215 },
216 DataType::Time64(time_unit) => match time_unit {
217 TimeUnit::Microsecond => {
218 Time64Microsecond(TimeArrowToVariantBuilder::new(array, options))
219 }
220 TimeUnit::Nanosecond => {
221 Time64Nanosecond(TimeArrowToVariantBuilder::new(array, options))
222 }
223 _ => {
224 return Err(ArrowError::CastError(format!(
225 "Unsupported Time64 unit: {time_unit:?}"
226 )));
227 }
228 },
229 DataType::Duration(_) | DataType::Interval(_) => {
230 return Err(ArrowError::InvalidArgumentError(
231 "Casting duration/interval types to Variant is not supported. \
232 The Variant format does not define duration/interval types."
233 .to_string(),
234 ));
235 }
236 DataType::Binary => Binary(BinaryArrowToVariantBuilder::new(array)),
237 DataType::LargeBinary => LargeBinary(BinaryArrowToVariantBuilder::new(array)),
238 DataType::BinaryView => BinaryView(BinaryViewArrowToVariantBuilder::new(array)),
239 DataType::FixedSizeBinary(_) => {
240 FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder::new(array))
241 }
242 DataType::Utf8 => Utf8(StringArrowToVariantBuilder::new(array)),
243 DataType::LargeUtf8 => LargeUtf8(StringArrowToVariantBuilder::new(array)),
244 DataType::Utf8View => Utf8View(StringViewArrowToVariantBuilder::new(array)),
245 DataType::List(_) => List(ListArrowToVariantBuilder::new(array.as_list(), options)?),
246 DataType::LargeList(_) => {
247 LargeList(ListArrowToVariantBuilder::new(array.as_list(), options)?)
248 }
249 DataType::ListView(_) => ListView(ListArrowToVariantBuilder::new(
250 array.as_list_view(),
251 options,
252 )?),
253 DataType::LargeListView(_) => LargeListView(ListArrowToVariantBuilder::new(
254 array.as_list_view(),
255 options,
256 )?),
257 DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
258 array.as_fixed_size_list(),
259 options,
260 )?),
261 DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
262 array.as_struct(),
263 options,
264 )?),
265 DataType::Map(_, _) => Map(MapArrowToVariantBuilder::new(array, options)?),
266 DataType::Union(_, _) => Union(UnionArrowToVariantBuilder::new(array, options)?),
267 DataType::Dictionary(_, _) => {
268 Dictionary(DictionaryArrowToVariantBuilder::new(array, options)?)
269 }
270 DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
271 DataType::Int16 => {
272 RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
273 }
274 DataType::Int32 => {
275 RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
276 }
277 DataType::Int64 => {
278 RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
279 }
280 _ => {
281 return Err(ArrowError::CastError(format!(
282 "Unsupported run ends type: {}",
283 run_ends.data_type()
284 )));
285 }
286 },
287 };
288 Ok(builder)
289}
290
291macro_rules! define_row_builder {
319 (
320 struct $name:ident<$lifetime:lifetime $(, $generic:ident $( : $bound:path )? )*>
321 $( where $where_path:path: $where_bound:path $(,)? )?
322 $({ $( $field:ident: $field_type:ty ),+ $(,)? })?,
323 |$array_param:ident| -> $array_type:ty { $init_expr:expr }
324 $(, |$value:ident| $(-> Option<$option_ty:ty>)? $value_transform:expr )?
325 ) => {
326 pub(crate) struct $name<$lifetime $(, $generic: $( $bound )? )*>
327 $( where $where_path: $where_bound )?
328 {
329 array: &$lifetime $array_type,
330 $( $( $field: $field_type, )+ )?
331 _phantom: std::marker::PhantomData<($( $generic, )*)>, }
333
334 impl<$lifetime $(, $generic: $( $bound )? )*> $name<$lifetime $(, $generic)*>
335 $( where $where_path: $where_bound )?
336 {
337 pub(crate) fn new($array_param: &$lifetime dyn Array $( $(, $field: $field_type )+ )?) -> Self {
338 Self {
339 array: $init_expr,
340 $( $( $field, )+ )?
341 _phantom: std::marker::PhantomData,
342 }
343 }
344
345 fn append_row(&self, builder: &mut impl VariantBuilderExt, index: usize) -> Result<(), ArrowError> {
346 if self.array.is_null(index) {
347 builder.append_null();
348 } else {
349 $(
358 #[allow(unused)]
359 $( let $field = &self.$field; )+
360 )?
361
362 let value = self.array.value(index);
364 $(
365 let $value = value;
366 let value = $value_transform;
367 $(
368 let Some(value): Option<$option_ty> = value else {
370 if self.options.strict {
371 return Err(ArrowError::ComputeError(format!(
372 "Failed to convert value at index {index}: conversion failed",
373 )));
374 } else {
375 builder.append_value(Variant::Null);
378 return Ok(());
379 }
380 };
381 )?
382 )?
383 builder.append_value(value);
384 }
385 Ok(())
386 }
387 }
388 };
389}
390
391define_row_builder!(
392 struct BooleanArrowToVariantBuilder<'a>,
393 |array| -> arrow::array::BooleanArray { array.as_boolean() }
394);
395
396define_row_builder!(
397 struct PrimitiveArrowToVariantBuilder<'a, T: ArrowPrimitiveType>
398 where T::Native: Into<Variant<'a, 'a>>,
399 |array| -> PrimitiveArray<T> { array.as_primitive() }
400);
401
402define_row_builder!(
403 struct DecimalArrowToVariantBuilder<'a, A: DecimalType, V>
404 where
405 V: VariantDecimalType<Native = A::Native>,
406 {
407 options: &'a CastOptions,
408 scale: i8,
409 },
410 |array| -> PrimitiveArray<A> { array.as_primitive() },
411 |value| -> Option<_> { V::try_new_with_signed_scale(value, *scale).ok() }
412);
413
414define_row_builder!(
416 struct Decimal256ArrowToVariantBuilder<'a> {
417 options: &'a CastOptions,
418 scale: i8,
419 },
420 |array| -> arrow::array::Decimal256Array { array.as_primitive() },
421 |value| -> Option<_> {
422 let value = value.to_i128();
423 value.and_then(|v| VariantDecimal16::try_new_with_signed_scale(v, *scale).ok())
424 }
425);
426
427define_row_builder!(
428 struct TimestampArrowToVariantBuilder<'a, T: ArrowTimestampType> {
429 options: &'a CastOptions,
430 has_time_zone: bool,
431 },
432 |array| -> PrimitiveArray<T> { array.as_primitive() },
433 |value| -> Option<_> {
434 as_datetime::<T>(value).map(|naive_datetime| {
436 if *has_time_zone {
437 let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&naive_datetime);
439 Variant::from(utc_dt) } else {
441 Variant::from(naive_datetime) }
444 })
445 }
446);
447
448define_row_builder!(
449 struct DateArrowToVariantBuilder<'a, T: ArrowTemporalType>
450 where
451 i64: From<T::Native>,
452 {
453 options: &'a CastOptions,
454 },
455 |array| -> PrimitiveArray<T> { array.as_primitive() },
456 |value| -> Option<_> {
457 let date_value = i64::from(value);
458 as_date::<T>(date_value)
459 }
460);
461
462define_row_builder!(
463 struct TimeArrowToVariantBuilder<'a, T: ArrowTemporalType>
464 where
465 i64: From<T::Native>,
466 {
467 options: &'a CastOptions,
468 },
469 |array| -> PrimitiveArray<T> { array.as_primitive() },
470 |value| -> Option<_> {
471 let time_value = i64::from(value);
472 as_time::<T>(time_value)
473 }
474);
475
476define_row_builder!(
477 struct BinaryArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
478 |array| -> GenericBinaryArray<O> { array.as_binary() }
479);
480
481define_row_builder!(
482 struct BinaryViewArrowToVariantBuilder<'a>,
483 |array| -> arrow::array::BinaryViewArray { array.as_byte_view() }
484);
485
486define_row_builder!(
487 struct FixedSizeBinaryArrowToVariantBuilder<'a>,
488 |array| -> arrow::array::FixedSizeBinaryArray { array.as_fixed_size_binary() }
489);
490
491define_row_builder!(
492 struct StringArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
493 |array| -> GenericStringArray<O> { array.as_string() }
494);
495
496define_row_builder!(
497 struct StringViewArrowToVariantBuilder<'a>,
498 |array| -> arrow::array::StringViewArray { array.as_string_view() }
499);
500
501pub(crate) struct NullArrowToVariantBuilder;
503
504impl NullArrowToVariantBuilder {
505 fn append_row(
506 &mut self,
507 builder: &mut impl VariantBuilderExt,
508 _index: usize,
509 ) -> Result<(), ArrowError> {
510 builder.append_null();
511 Ok(())
512 }
513}
514
515pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
518 list_array: &'a L,
519 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
520}
521
522impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
523 pub(crate) fn new(array: &'a L, options: &'a CastOptions) -> Result<Self, ArrowError> {
524 let values = array.values();
525 let values_builder =
526 make_arrow_to_variant_row_builder(values.data_type(), values, options)?;
527
528 Ok(Self {
529 list_array: array,
530 values_builder: Box::new(values_builder),
531 })
532 }
533
534 fn append_row(
535 &mut self,
536 builder: &mut impl VariantBuilderExt,
537 index: usize,
538 ) -> Result<(), ArrowError> {
539 if self.list_array.is_null(index) {
540 builder.append_null();
541 return Ok(());
542 }
543
544 let range = self.list_array.element_range(index);
545
546 let mut list_builder = builder.try_new_list()?;
547 for value_index in range {
548 self.values_builder
549 .append_row(&mut list_builder, value_index)?;
550 }
551 list_builder.finish();
552 Ok(())
553 }
554}
555
556pub(crate) trait ListLikeArray: Array {
558 fn values(&self) -> &dyn Array;
560
561 fn element_range(&self, index: usize) -> Range<usize>;
563}
564
565impl<O: OffsetSizeTrait> ListLikeArray for GenericListArray<O> {
566 fn values(&self) -> &dyn Array {
567 self.values()
568 }
569
570 fn element_range(&self, index: usize) -> Range<usize> {
571 let offsets = self.offsets();
572 let start = offsets[index].as_usize();
573 let end = offsets[index + 1].as_usize();
574 start..end
575 }
576}
577
578impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
579 fn values(&self) -> &dyn Array {
580 self.values()
581 }
582
583 fn element_range(&self, index: usize) -> Range<usize> {
584 let offsets = self.value_offsets();
585 let sizes = self.value_sizes();
586 let offset = offsets[index].as_usize();
587 let size = sizes[index].as_usize();
588 offset..(offset + size)
589 }
590}
591
592impl ListLikeArray for FixedSizeListArray {
593 fn values(&self) -> &dyn Array {
594 self.values()
595 }
596
597 fn element_range(&self, index: usize) -> Range<usize> {
598 let value_length = self.value_length().as_usize();
599 let offset = index * value_length;
600 offset..(offset + value_length)
601 }
602}
603
604pub(crate) struct StructArrowToVariantBuilder<'a> {
606 struct_array: &'a arrow::array::StructArray,
607 field_builders: Vec<(&'a str, ArrowToVariantRowBuilder<'a>)>,
608}
609
610impl<'a> StructArrowToVariantBuilder<'a> {
611 pub(crate) fn new(
612 struct_array: &'a arrow::array::StructArray,
613 options: &'a CastOptions,
614 ) -> Result<Self, ArrowError> {
615 let mut field_builders = Vec::new();
616
617 for (field_name, field_array) in struct_array
619 .column_names()
620 .iter()
621 .zip(struct_array.columns().iter())
622 {
623 let field_builder = make_arrow_to_variant_row_builder(
624 field_array.data_type(),
625 field_array.as_ref(),
626 options,
627 )?;
628 field_builders.push((*field_name, field_builder));
629 }
630
631 Ok(Self {
632 struct_array,
633 field_builders,
634 })
635 }
636
637 fn append_row(
638 &mut self,
639 builder: &mut impl VariantBuilderExt,
640 index: usize,
641 ) -> Result<(), ArrowError> {
642 if self.struct_array.is_null(index) {
643 builder.append_null();
644 } else {
645 let mut obj_builder = builder.try_new_object()?;
647
648 for (field_name, row_builder) in &mut self.field_builders {
650 let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
651 row_builder.append_row(&mut field_builder, index)?;
652 }
653
654 obj_builder.finish();
655 }
656 Ok(())
657 }
658}
659
660pub(crate) struct MapArrowToVariantBuilder<'a> {
662 map_array: &'a arrow::array::MapArray,
663 key_strings: arrow::array::StringArray,
664 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
665}
666
667impl<'a> MapArrowToVariantBuilder<'a> {
668 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
669 let map_array = array.as_map();
670
671 let keys = cast(map_array.keys(), &DataType::Utf8)?;
673 let key_strings = keys.as_string::<i32>().clone();
674
675 let values = map_array.values();
677 let values_builder =
678 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
679
680 Ok(Self {
681 map_array,
682 key_strings,
683 values_builder: Box::new(values_builder),
684 })
685 }
686
687 fn append_row(
688 &mut self,
689 builder: &mut impl VariantBuilderExt,
690 index: usize,
691 ) -> Result<(), ArrowError> {
692 if self.map_array.is_null(index) {
694 builder.append_null();
695 return Ok(());
696 }
697
698 let offsets = self.map_array.offsets();
699 let start = offsets[index].as_usize();
700 let end = offsets[index + 1].as_usize();
701
702 let mut object_builder = builder.try_new_object()?;
704
705 for kv_index in start..end {
707 let key = self.key_strings.value(kv_index);
708 let mut field_builder = ObjectFieldBuilder::new(key, &mut object_builder);
709 self.values_builder
710 .append_row(&mut field_builder, kv_index)?;
711 }
712
713 object_builder.finish();
714 Ok(())
715 }
716}
717
718pub(crate) struct UnionArrowToVariantBuilder<'a> {
722 union_array: &'a arrow::array::UnionArray,
723 child_builders: HashMap<i8, Box<ArrowToVariantRowBuilder<'a>>>,
724}
725
726impl<'a> UnionArrowToVariantBuilder<'a> {
727 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
728 let union_array = array.as_union();
729 let type_ids = union_array.type_ids();
730
731 let mut child_builders = HashMap::new();
733 for &type_id in type_ids {
734 let child_array = union_array.child(type_id);
735 let child_builder = make_arrow_to_variant_row_builder(
736 child_array.data_type(),
737 child_array.as_ref(),
738 options,
739 )?;
740 child_builders.insert(type_id, Box::new(child_builder));
741 }
742
743 Ok(Self {
744 union_array,
745 child_builders,
746 })
747 }
748
749 fn append_row(
750 &mut self,
751 builder: &mut impl VariantBuilderExt,
752 index: usize,
753 ) -> Result<(), ArrowError> {
754 let type_id = self.union_array.type_id(index);
755 let value_offset = self.union_array.value_offset(index);
756
757 match self.child_builders.get_mut(&type_id) {
759 Some(child_builder) => child_builder.append_row(builder, value_offset)?,
760 None => builder.append_null(),
761 }
762
763 Ok(())
764 }
765}
766
767pub(crate) struct DictionaryArrowToVariantBuilder<'a> {
769 keys: &'a dyn Array, normalized_keys: Vec<usize>,
771 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
772}
773
774impl<'a> DictionaryArrowToVariantBuilder<'a> {
775 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
776 let dict_array = array.as_any_dictionary();
777 let values = dict_array.values();
778 let values_builder =
779 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
780
781 let normalized_keys = match values.len() {
783 0 => Vec::new(),
784 _ => dict_array.normalized_keys(),
785 };
786
787 Ok(Self {
788 keys: dict_array.keys(),
789 normalized_keys,
790 values_builder: Box::new(values_builder),
791 })
792 }
793
794 fn append_row(
795 &mut self,
796 builder: &mut impl VariantBuilderExt,
797 index: usize,
798 ) -> Result<(), ArrowError> {
799 if self.keys.is_null(index) {
800 builder.append_null();
801 } else {
802 let normalized_key = self.normalized_keys[index];
803 self.values_builder.append_row(builder, normalized_key)?;
804 }
805 Ok(())
806 }
807}
808
809pub(crate) struct RunEndEncodedArrowToVariantBuilder<'a, R: RunEndIndexType> {
811 run_array: &'a arrow::array::RunArray<R>,
812 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
813
814 run_ends: &'a [R::Native],
815 run_number: usize, run_start: usize, }
818
819impl<'a, R: RunEndIndexType> RunEndEncodedArrowToVariantBuilder<'a, R> {
820 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
821 let Some(run_array) = array.as_run_opt() else {
822 return Err(ArrowError::CastError("Expected RunArray".to_string()));
823 };
824
825 let values = run_array.values();
826 let values_builder =
827 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
828
829 Ok(Self {
830 run_array,
831 values_builder: Box::new(values_builder),
832 run_ends: run_array.run_ends().values(),
833 run_number: 0,
834 run_start: 0,
835 })
836 }
837
838 fn set_run_for_index(&mut self, index: usize) -> Result<(), ArrowError> {
839 if index >= self.run_start {
840 let Some(run_end) = self.run_ends.get(self.run_number) else {
841 return Err(ArrowError::CastError(format!(
842 "Index {index} beyond run array"
843 )));
844 };
845 if index < run_end.as_usize() {
846 return Ok(());
847 }
848 if index == run_end.as_usize() {
849 self.run_number += 1;
850 self.run_start = run_end.as_usize();
851 return Ok(());
852 }
853 }
854
855 let run_number = self
857 .run_ends
858 .partition_point(|&run_end| run_end.as_usize() <= index);
859 if run_number >= self.run_ends.len() {
860 return Err(ArrowError::CastError(format!(
861 "Index {index} beyond run array"
862 )));
863 }
864 self.run_number = run_number;
865 self.run_start = match run_number {
866 0 => 0,
867 _ => self.run_ends[run_number - 1].as_usize(),
868 };
869 Ok(())
870 }
871
872 fn append_row(
873 &mut self,
874 builder: &mut impl VariantBuilderExt,
875 index: usize,
876 ) -> Result<(), ArrowError> {
877 self.set_run_for_index(index)?;
878
879 if self.run_array.values().is_null(self.run_number) {
881 builder.append_null();
882 return Ok(());
883 }
884
885 self.values_builder.append_row(builder, self.run_number)?;
887
888 Ok(())
889 }
890}
891
892#[cfg(test)]
893mod tests {
894 use super::*;
895 use crate::{VariantArray, VariantArrayBuilder};
896 use arrow::array::{ArrayRef, BooleanArray, Int32Array, StringArray};
897 use arrow::datatypes::Int32Type;
898 use std::sync::Arc;
899
900 fn execute_row_builder_test(array: &dyn Array) -> VariantArray {
902 execute_row_builder_test_with_options(array, CastOptions::default())
903 }
904
905 fn execute_row_builder_test_with_options(
907 array: &dyn Array,
908 options: CastOptions,
909 ) -> VariantArray {
910 let mut row_builder =
911 make_arrow_to_variant_row_builder(array.data_type(), array, &options).unwrap();
912
913 let mut array_builder = VariantArrayBuilder::new(array.len());
914
915 for i in 0..array.len() {
917 row_builder.append_row(&mut array_builder, i).unwrap();
918 }
919
920 let variant_array = array_builder.build();
921 assert_eq!(variant_array.len(), array.len());
922 variant_array
923 }
924
925 fn test_row_builder_basic(array: &dyn Array, expected_values: Vec<Option<Variant>>) {
928 test_row_builder_basic_with_options(array, expected_values, CastOptions::default());
929 }
930
931 fn test_row_builder_basic_with_options(
933 array: &dyn Array,
934 expected_values: Vec<Option<Variant>>,
935 options: CastOptions,
936 ) {
937 let variant_array = execute_row_builder_test_with_options(array, options);
938
939 for (i, expected) in expected_values.iter().enumerate() {
941 match expected {
942 Some(variant) => {
943 assert_eq!(variant_array.value(i), *variant, "Mismatch at index {}", i)
944 }
945 None => assert!(variant_array.is_null(i), "Expected null at index {}", i),
946 }
947 }
948 }
949
950 #[test]
951 fn test_primitive_row_builder() {
952 let int_array = Int32Array::from(vec![Some(42), None, Some(100)]);
953 test_row_builder_basic(
954 &int_array,
955 vec![Some(Variant::Int32(42)), None, Some(Variant::Int32(100))],
956 );
957 }
958
959 #[test]
960 fn test_string_row_builder() {
961 let string_array = StringArray::from(vec![Some("hello"), None, Some("world")]);
962 test_row_builder_basic(
963 &string_array,
964 vec![
965 Some(Variant::from("hello")),
966 None,
967 Some(Variant::from("world")),
968 ],
969 );
970 }
971
972 #[test]
973 fn test_boolean_row_builder() {
974 let bool_array = BooleanArray::from(vec![Some(true), None, Some(false)]);
975 test_row_builder_basic(
976 &bool_array,
977 vec![Some(Variant::from(true)), None, Some(Variant::from(false))],
978 );
979 }
980
981 #[test]
982 fn test_struct_row_builder() {
983 use arrow::array::{ArrayRef, Int32Array, StringArray, StructArray};
984 use arrow_schema::{DataType, Field};
985 use std::sync::Arc;
986
987 let int_field = Field::new("id", DataType::Int32, true);
989 let string_field = Field::new("name", DataType::Utf8, true);
990
991 let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
992 let string_array = StringArray::from(vec![Some("Alice"), Some("Bob"), None]);
993
994 let struct_array = StructArray::try_new(
995 vec![int_field, string_field].into(),
996 vec![
997 Arc::new(int_array) as ArrayRef,
998 Arc::new(string_array) as ArrayRef,
999 ],
1000 None,
1001 )
1002 .unwrap();
1003
1004 let variant_array = execute_row_builder_test(&struct_array);
1005
1006 let first_variant = variant_array.value(0);
1008 assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1009 assert_eq!(
1010 first_variant.get_object_field("name"),
1011 Some(Variant::from("Alice"))
1012 );
1013
1014 let second_variant = variant_array.value(1);
1016 assert_eq!(second_variant.get_object_field("id"), None); assert_eq!(
1018 second_variant.get_object_field("name"),
1019 Some(Variant::from("Bob"))
1020 );
1021
1022 let third_variant = variant_array.value(2);
1024 assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(3)));
1025 assert_eq!(third_variant.get_object_field("name"), None); }
1027
1028 #[test]
1029 fn test_run_end_encoded_row_builder() {
1030 use arrow::array::{Int32Array, RunArray};
1031 use arrow::datatypes::Int32Type;
1032
1033 let values = StringArray::from(vec!["A", "B", "C"]);
1037 let run_ends = Int32Array::from(vec![2, 5, 6]);
1038 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1039
1040 let variant_array = execute_row_builder_test(&run_array);
1041
1042 assert_eq!(variant_array.value(0), Variant::from("A")); assert_eq!(variant_array.value(1), Variant::from("A")); assert_eq!(variant_array.value(2), Variant::from("B")); assert_eq!(variant_array.value(3), Variant::from("B")); assert_eq!(variant_array.value(4), Variant::from("B")); assert_eq!(variant_array.value(5), Variant::from("C")); }
1050
1051 #[test]
1052 fn test_run_end_encoded_random_access() {
1053 use arrow::array::{Int32Array, RunArray};
1054 use arrow::datatypes::Int32Type;
1055
1056 let values = StringArray::from(vec!["A", "B", "C"]);
1058 let run_ends = Int32Array::from(vec![2, 5, 6]);
1059 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1060
1061 let options = CastOptions::default();
1062 let mut row_builder =
1063 make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1064
1065 let access_pattern = [0, 5, 2, 4, 1, 3]; let expected_values = ["A", "C", "B", "B", "A", "B"];
1068
1069 for (i, &index) in access_pattern.iter().enumerate() {
1070 let mut array_builder = VariantArrayBuilder::new(1);
1071 row_builder.append_row(&mut array_builder, index).unwrap();
1072 let variant_array = array_builder.build();
1073 assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1074 }
1075 }
1076
1077 #[test]
1078 fn test_run_end_encoded_with_nulls() {
1079 use arrow::array::{Int32Array, RunArray};
1080 use arrow::datatypes::Int32Type;
1081
1082 let values = StringArray::from(vec![Some("A"), None, Some("B")]);
1084 let run_ends = Int32Array::from(vec![2, 4, 5]);
1085 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1086
1087 let options = CastOptions::default();
1088 let mut row_builder =
1089 make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1090 let mut array_builder = VariantArrayBuilder::new(5);
1091
1092 for i in 0..5 {
1094 row_builder.append_row(&mut array_builder, i).unwrap();
1095 }
1096
1097 let variant_array = array_builder.build();
1098 assert_eq!(variant_array.len(), 5);
1099
1100 assert_eq!(variant_array.value(0), Variant::from("A")); assert_eq!(variant_array.value(1), Variant::from("A")); assert!(variant_array.is_null(2)); assert!(variant_array.is_null(3)); assert_eq!(variant_array.value(4), Variant::from("B")); }
1107
1108 #[test]
1109 fn test_dictionary_row_builder() {
1110 use arrow::array::{DictionaryArray, Int32Array};
1111 use arrow::datatypes::Int32Type;
1112
1113 let values = StringArray::from(vec!["apple", "banana", "cherry"]);
1115 let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1116 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1117
1118 let variant_array = execute_row_builder_test(&dict_array);
1119
1120 assert_eq!(variant_array.value(0), Variant::from("apple")); assert_eq!(variant_array.value(1), Variant::from("banana")); assert_eq!(variant_array.value(2), Variant::from("apple")); assert_eq!(variant_array.value(3), Variant::from("cherry")); assert_eq!(variant_array.value(4), Variant::from("banana")); }
1127
1128 #[test]
1129 fn test_dictionary_with_nulls() {
1130 use arrow::array::{DictionaryArray, Int32Array};
1131 use arrow::datatypes::Int32Type;
1132
1133 let values = StringArray::from(vec!["x", "y", "z"]);
1135 let keys = Int32Array::from(vec![Some(0), None, Some(1), None, Some(2)]);
1136 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1137
1138 let options = CastOptions::default();
1139 let mut row_builder =
1140 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1141 .unwrap();
1142 let mut array_builder = VariantArrayBuilder::new(5);
1143
1144 for i in 0..5 {
1146 row_builder.append_row(&mut array_builder, i).unwrap();
1147 }
1148
1149 let variant_array = array_builder.build();
1150 assert_eq!(variant_array.len(), 5);
1151
1152 assert_eq!(variant_array.value(0), Variant::from("x")); assert!(variant_array.is_null(1)); assert_eq!(variant_array.value(2), Variant::from("y")); assert!(variant_array.is_null(3)); assert_eq!(variant_array.value(4), Variant::from("z")); }
1159
1160 #[test]
1161 fn test_dictionary_random_access() {
1162 use arrow::array::{DictionaryArray, Int32Array};
1163 use arrow::datatypes::Int32Type;
1164
1165 let values = StringArray::from(vec!["red", "green", "blue"]);
1167 let keys = Int32Array::from(vec![0, 1, 2, 0, 1, 2]);
1168 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1169
1170 let options = CastOptions::default();
1171 let mut row_builder =
1172 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1173 .unwrap();
1174
1175 let access_pattern = [5, 0, 3, 1, 4, 2]; let expected_values = ["blue", "red", "red", "green", "green", "blue"];
1178
1179 for (i, &index) in access_pattern.iter().enumerate() {
1180 let mut array_builder = VariantArrayBuilder::new(1);
1181 row_builder.append_row(&mut array_builder, index).unwrap();
1182 let variant_array = array_builder.build();
1183 assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1184 }
1185 }
1186
1187 #[test]
1188 fn test_nested_dictionary() {
1189 use arrow::array::{DictionaryArray, Int32Array, StructArray};
1190 use arrow::datatypes::{Field, Int32Type};
1191
1192 let id_array = Int32Array::from(vec![1, 2, 3]);
1194 let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
1195 let struct_array = StructArray::from(vec![
1196 (
1197 Arc::new(Field::new("id", DataType::Int32, false)),
1198 Arc::new(id_array) as ArrayRef,
1199 ),
1200 (
1201 Arc::new(Field::new("name", DataType::Utf8, false)),
1202 Arc::new(name_array) as ArrayRef,
1203 ),
1204 ]);
1205
1206 let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1207 let dict_array =
1208 DictionaryArray::<Int32Type>::try_new(keys, Arc::new(struct_array)).unwrap();
1209
1210 let options = CastOptions::default();
1211 let mut row_builder =
1212 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1213 .unwrap();
1214 let mut array_builder = VariantArrayBuilder::new(5);
1215
1216 for i in 0..5 {
1218 row_builder.append_row(&mut array_builder, i).unwrap();
1219 }
1220
1221 let variant_array = array_builder.build();
1222 assert_eq!(variant_array.len(), 5);
1223
1224 let first_variant = variant_array.value(0);
1226 assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1227 assert_eq!(
1228 first_variant.get_object_field("name"),
1229 Some(Variant::from("Alice"))
1230 );
1231
1232 let second_variant = variant_array.value(1);
1233 assert_eq!(
1234 second_variant.get_object_field("id"),
1235 Some(Variant::from(2))
1236 );
1237 assert_eq!(
1238 second_variant.get_object_field("name"),
1239 Some(Variant::from("Bob"))
1240 );
1241
1242 let third_variant = variant_array.value(2);
1244 assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(1)));
1245 assert_eq!(
1246 third_variant.get_object_field("name"),
1247 Some(Variant::from("Alice"))
1248 );
1249 }
1250
1251 #[test]
1252 fn test_list_row_builder() {
1253 use arrow::array::ListArray;
1254
1255 let data = vec![
1257 Some(vec![Some(1), Some(2)]),
1258 Some(vec![Some(3), Some(4), Some(5)]),
1259 None,
1260 Some(vec![]),
1261 ];
1262 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1263
1264 let variant_array = execute_row_builder_test(&list_array);
1265
1266 let row0 = variant_array.value(0);
1268 let list0 = row0.as_list().unwrap();
1269 assert_eq!(list0.len(), 2);
1270 assert_eq!(list0.get(0), Some(Variant::from(1)));
1271 assert_eq!(list0.get(1), Some(Variant::from(2)));
1272
1273 let row1 = variant_array.value(1);
1275 let list1 = row1.as_list().unwrap();
1276 assert_eq!(list1.len(), 3);
1277 assert_eq!(list1.get(0), Some(Variant::from(3)));
1278 assert_eq!(list1.get(1), Some(Variant::from(4)));
1279 assert_eq!(list1.get(2), Some(Variant::from(5)));
1280
1281 assert!(variant_array.is_null(2));
1283
1284 let row3 = variant_array.value(3);
1286 let list3 = row3.as_list().unwrap();
1287 assert_eq!(list3.len(), 0);
1288 }
1289
1290 #[test]
1291 fn test_sliced_list_row_builder() {
1292 use arrow::array::ListArray;
1293
1294 let data = vec![
1296 Some(vec![Some(1), Some(2)]),
1297 Some(vec![Some(3), Some(4), Some(5)]),
1298 Some(vec![Some(6)]),
1299 ];
1300 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1301
1302 let sliced_array = list_array.slice(1, 1);
1304
1305 let options = CastOptions::default();
1306 let mut row_builder =
1307 make_arrow_to_variant_row_builder(sliced_array.data_type(), &sliced_array, &options)
1308 .unwrap();
1309 let mut variant_array_builder = VariantArrayBuilder::new(sliced_array.len());
1310
1311 row_builder
1313 .append_row(&mut variant_array_builder, 0)
1314 .unwrap();
1315 let variant_array = variant_array_builder.build();
1316
1317 assert_eq!(variant_array.len(), 1);
1319
1320 let row0 = variant_array.value(0);
1322 let list0 = row0.as_list().unwrap();
1323 assert_eq!(list0.len(), 3);
1324 assert_eq!(list0.get(0), Some(Variant::from(3)));
1325 assert_eq!(list0.get(1), Some(Variant::from(4)));
1326 assert_eq!(list0.get(2), Some(Variant::from(5)));
1327 }
1328
1329 #[test]
1330 fn test_nested_list_row_builder() {
1331 use arrow::array::ListArray;
1332 use arrow::datatypes::Field;
1333
1334 let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
1336 let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_field), true));
1337
1338 let values_data = vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])];
1339 let values_list = ListArray::from_iter_primitive::<Int32Type, _, _>(values_data);
1340
1341 let outer_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 2].into());
1342 let outer_list = ListArray::new(
1343 inner_list_field,
1344 outer_offsets,
1345 Arc::new(values_list),
1346 Some(arrow::buffer::NullBuffer::from(vec![true, false])),
1347 );
1348
1349 let options = CastOptions::default();
1350 let mut row_builder =
1351 make_arrow_to_variant_row_builder(outer_list.data_type(), &outer_list, &options)
1352 .unwrap();
1353 let mut variant_array_builder = VariantArrayBuilder::new(outer_list.len());
1354
1355 for i in 0..outer_list.len() {
1356 row_builder
1357 .append_row(&mut variant_array_builder, i)
1358 .unwrap();
1359 }
1360
1361 let variant_array = variant_array_builder.build();
1362
1363 assert_eq!(variant_array.len(), 2);
1365
1366 let row0 = variant_array.value(0);
1368 let outer_list0 = row0.as_list().unwrap();
1369 assert_eq!(outer_list0.len(), 2);
1370
1371 let inner_list0_0 = outer_list0.get(0).unwrap();
1372 let inner_list0_0 = inner_list0_0.as_list().unwrap();
1373 assert_eq!(inner_list0_0.len(), 2);
1374 assert_eq!(inner_list0_0.get(0), Some(Variant::from(1)));
1375 assert_eq!(inner_list0_0.get(1), Some(Variant::from(2)));
1376
1377 let inner_list0_1 = outer_list0.get(1).unwrap();
1378 let inner_list0_1 = inner_list0_1.as_list().unwrap();
1379 assert_eq!(inner_list0_1.len(), 1);
1380 assert_eq!(inner_list0_1.get(0), Some(Variant::from(3)));
1381
1382 assert!(variant_array.is_null(1));
1384 }
1385
1386 #[test]
1387 fn test_map_row_builder() {
1388 use arrow::array::{Int32Array, MapArray, StringArray, StructArray};
1389 use arrow::buffer::{NullBuffer, OffsetBuffer};
1390 use arrow::datatypes::{DataType, Field, Fields};
1391 use std::sync::Arc;
1392
1393 let keys = StringArray::from(vec!["key1", "key2", "key3"]);
1395 let values = Int32Array::from(vec![1, 2, 3]);
1396 let entries_fields = Fields::from(vec![
1397 Field::new("key", DataType::Utf8, false),
1398 Field::new("value", DataType::Int32, true),
1399 ]);
1400 let entries = StructArray::new(
1401 entries_fields.clone(),
1402 vec![Arc::new(keys), Arc::new(values)],
1403 None, );
1405
1406 let offsets = OffsetBuffer::new(vec![0, 1, 1, 1, 3].into());
1412
1413 let null_buffer = Some(NullBuffer::from(vec![true, true, false, true]));
1415
1416 let map_field = Arc::new(Field::new(
1418 "entries",
1419 DataType::Struct(entries_fields),
1420 false, ));
1422
1423 let map_array = MapArray::try_new(
1425 map_field,
1426 offsets,
1427 entries,
1428 null_buffer,
1429 false, )
1431 .unwrap();
1432
1433 let variant_array = execute_row_builder_test(&map_array);
1434
1435 let map0 = variant_array.value(0);
1437 let obj0 = map0.as_object().unwrap();
1438 assert_eq!(obj0.len(), 1);
1439 assert_eq!(obj0.get("key1"), Some(Variant::from(1)));
1440
1441 let map1 = variant_array.value(1);
1443 let obj1 = map1.as_object().unwrap();
1444 assert_eq!(obj1.len(), 0); assert!(variant_array.is_null(2));
1448
1449 let map3 = variant_array.value(3);
1451 let obj3 = map3.as_object().unwrap();
1452 assert_eq!(obj3.len(), 2);
1453 assert_eq!(obj3.get("key2"), Some(Variant::from(2)));
1454 assert_eq!(obj3.get("key3"), Some(Variant::from(3)));
1455 }
1456
1457 #[test]
1458 fn test_union_sparse_row_builder() {
1459 use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1460 use arrow::buffer::ScalarBuffer;
1461 use arrow::datatypes::{DataType, Field, UnionFields};
1462 use std::sync::Arc;
1463
1464 let int_array = Int32Array::from(vec![Some(1), None, None, None, Some(34), None]);
1466 let float_array = Float64Array::from(vec![None, Some(3.2), None, Some(32.5), None, None]);
1467 let string_array = StringArray::from(vec![None, None, Some("hello"), None, None, None]);
1468 let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1469
1470 let union_fields = UnionFields::new(
1471 vec![0, 1, 2],
1472 vec![
1473 Field::new("int_field", DataType::Int32, false),
1474 Field::new("float_field", DataType::Float64, false),
1475 Field::new("string_field", DataType::Utf8, false),
1476 ],
1477 );
1478
1479 let children: Vec<Arc<dyn Array>> = vec![
1480 Arc::new(int_array),
1481 Arc::new(float_array),
1482 Arc::new(string_array),
1483 ];
1484
1485 let union_array = UnionArray::try_new(
1486 union_fields,
1487 type_ids,
1488 None, children,
1490 )
1491 .unwrap();
1492
1493 let variant_array = execute_row_builder_test(&union_array);
1494 assert_eq!(variant_array.value(0), Variant::Int32(1));
1495 assert_eq!(variant_array.value(1), Variant::Double(3.2));
1496 assert_eq!(variant_array.value(2), Variant::from("hello"));
1497 assert_eq!(variant_array.value(3), Variant::Double(32.5));
1498 assert_eq!(variant_array.value(4), Variant::Int32(34));
1499 assert!(variant_array.is_null(5));
1500 }
1501
1502 #[test]
1503 fn test_union_dense_row_builder() {
1504 use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1505 use arrow::buffer::ScalarBuffer;
1506 use arrow::datatypes::{DataType, Field, UnionFields};
1507 use std::sync::Arc;
1508
1509 let int_array = Int32Array::from(vec![Some(1), Some(34), None]);
1511 let float_array = Float64Array::from(vec![3.2, 32.5]);
1512 let string_array = StringArray::from(vec!["hello"]);
1513 let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1514 let offsets = [0, 0, 0, 1, 1, 2]
1515 .into_iter()
1516 .collect::<ScalarBuffer<i32>>();
1517
1518 let union_fields = UnionFields::new(
1519 vec![0, 1, 2],
1520 vec![
1521 Field::new("int_field", DataType::Int32, false),
1522 Field::new("float_field", DataType::Float64, false),
1523 Field::new("string_field", DataType::Utf8, false),
1524 ],
1525 );
1526
1527 let children: Vec<Arc<dyn Array>> = vec![
1528 Arc::new(int_array),
1529 Arc::new(float_array),
1530 Arc::new(string_array),
1531 ];
1532
1533 let union_array = UnionArray::try_new(
1534 union_fields,
1535 type_ids,
1536 Some(offsets), children,
1538 )
1539 .unwrap();
1540
1541 let options = CastOptions::default();
1543 let mut row_builder =
1544 make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1545 .unwrap();
1546
1547 let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1548 for i in 0..union_array.len() {
1549 row_builder.append_row(&mut variant_builder, i).unwrap();
1550 }
1551 let variant_array = variant_builder.build();
1552
1553 assert_eq!(variant_array.len(), 6);
1554 assert_eq!(variant_array.value(0), Variant::Int32(1));
1555 assert_eq!(variant_array.value(1), Variant::Double(3.2));
1556 assert_eq!(variant_array.value(2), Variant::from("hello"));
1557 assert_eq!(variant_array.value(3), Variant::Double(32.5));
1558 assert_eq!(variant_array.value(4), Variant::Int32(34));
1559 assert!(variant_array.is_null(5));
1560 }
1561
1562 #[test]
1563 fn test_union_sparse_type_ids_row_builder() {
1564 use arrow::array::{Int32Array, StringArray, UnionArray};
1565 use arrow::buffer::ScalarBuffer;
1566 use arrow::datatypes::{DataType, Field, UnionFields};
1567 use std::sync::Arc;
1568
1569 let int_array = Int32Array::from(vec![Some(42), None]);
1571 let string_array = StringArray::from(vec![None, Some("test")]);
1572 let type_ids = [1, 3].into_iter().collect::<ScalarBuffer<i8>>();
1573
1574 let union_fields = UnionFields::new(
1575 vec![1, 3], vec![
1577 Field::new("int_field", DataType::Int32, false),
1578 Field::new("string_field", DataType::Utf8, false),
1579 ],
1580 );
1581
1582 let children: Vec<Arc<dyn Array>> = vec![Arc::new(int_array), Arc::new(string_array)];
1583
1584 let union_array = UnionArray::try_new(
1585 union_fields,
1586 type_ids,
1587 None, children,
1589 )
1590 .unwrap();
1591
1592 let options = CastOptions::default();
1594 let mut row_builder =
1595 make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1596 .unwrap();
1597
1598 let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1599 for i in 0..union_array.len() {
1600 row_builder.append_row(&mut variant_builder, i).unwrap();
1601 }
1602 let variant_array = variant_builder.build();
1603
1604 assert_eq!(variant_array.len(), 2);
1606
1607 assert_eq!(variant_array.value(0), Variant::Int32(42));
1609
1610 assert_eq!(variant_array.value(1), Variant::from("test"));
1612 }
1613
1614 #[test]
1615 fn test_decimal32_row_builder() {
1616 use arrow::array::Decimal32Array;
1617 use parquet_variant::VariantDecimal4;
1618
1619 let decimal_array = Decimal32Array::from(vec![Some(1234), None, Some(-5678)])
1621 .with_precision_and_scale(9, 2)
1622 .unwrap();
1623
1624 test_row_builder_basic(
1625 &decimal_array,
1626 vec![
1627 Some(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())),
1628 None,
1629 Some(Variant::from(VariantDecimal4::try_new(-5678, 2).unwrap())),
1630 ],
1631 );
1632 }
1633
1634 #[test]
1635 fn test_decimal128_row_builder() {
1636 use arrow::array::Decimal128Array;
1637 use parquet_variant::VariantDecimal16;
1638
1639 let decimal_array = Decimal128Array::from(vec![Some(123), None, Some(456)])
1641 .with_precision_and_scale(10, -2)
1642 .unwrap();
1643
1644 test_row_builder_basic(
1645 &decimal_array,
1646 vec![
1647 Some(Variant::from(VariantDecimal16::try_new(12300, 0).unwrap())),
1648 None,
1649 Some(Variant::from(VariantDecimal16::try_new(45600, 0).unwrap())),
1650 ],
1651 );
1652 }
1653
1654 #[test]
1655 fn test_decimal256_overflow_row_builder() {
1656 use arrow::array::Decimal256Array;
1657 use arrow::datatypes::i256;
1658
1659 let large_value = i256::from_i128(i128::MAX) + i256::from(1); let decimal_array = Decimal256Array::from(vec![Some(large_value), Some(i256::from(123))])
1662 .with_precision_and_scale(76, 3)
1663 .unwrap();
1664
1665 test_row_builder_basic_with_options(
1666 &decimal_array,
1667 vec![
1668 Some(Variant::Null), Some(Variant::from(VariantDecimal16::try_new(123, 3).unwrap())),
1670 ],
1671 CastOptions { strict: false },
1672 );
1673 }
1674
1675 #[test]
1676 fn test_binary_row_builder() {
1677 use arrow::array::BinaryArray;
1678
1679 let binary_data = vec![
1680 Some(b"hello".as_slice()),
1681 None,
1682 Some(b"\x00\x01\x02\xFF".as_slice()),
1683 Some(b"".as_slice()), ];
1685 let binary_array = BinaryArray::from(binary_data);
1686
1687 test_row_builder_basic(
1688 &binary_array,
1689 vec![
1690 Some(Variant::from(b"hello".as_slice())),
1691 None,
1692 Some(Variant::from([0x00, 0x01, 0x02, 0xFF].as_slice())),
1693 Some(Variant::from([].as_slice())),
1694 ],
1695 );
1696 }
1697
1698 #[test]
1699 fn test_binary_view_row_builder() {
1700 use arrow::array::BinaryViewArray;
1701
1702 let binary_data = vec![
1703 Some(b"short".as_slice()),
1704 None,
1705 Some(b"this is a longer binary view that exceeds inline storage".as_slice()),
1706 ];
1707 let binary_view_array = BinaryViewArray::from(binary_data);
1708
1709 test_row_builder_basic(
1710 &binary_view_array,
1711 vec![
1712 Some(Variant::from(b"short".as_slice())),
1713 None,
1714 Some(Variant::from(
1715 b"this is a longer binary view that exceeds inline storage".as_slice(),
1716 )),
1717 ],
1718 );
1719 }
1720
1721 #[test]
1722 fn test_fixed_size_binary_row_builder() {
1723 use arrow::array::FixedSizeBinaryArray;
1724
1725 let binary_data = vec![
1726 Some([0x01, 0x02, 0x03, 0x04]),
1727 None,
1728 Some([0xFF, 0xFE, 0xFD, 0xFC]),
1729 ];
1730 let fixed_binary_array =
1731 FixedSizeBinaryArray::try_from_sparse_iter_with_size(binary_data.into_iter(), 4)
1732 .unwrap();
1733
1734 test_row_builder_basic(
1735 &fixed_binary_array,
1736 vec![
1737 Some(Variant::from([0x01, 0x02, 0x03, 0x04].as_slice())),
1738 None,
1739 Some(Variant::from([0xFF, 0xFE, 0xFD, 0xFC].as_slice())),
1740 ],
1741 );
1742 }
1743
1744 #[test]
1745 fn test_utf8_view_row_builder() {
1746 use arrow::array::StringViewArray;
1747
1748 let string_data = vec![
1749 Some("short"),
1750 None,
1751 Some("this is a much longer string that will be stored out-of-line in the buffer"),
1752 ];
1753 let string_view_array = StringViewArray::from(string_data);
1754
1755 test_row_builder_basic(
1756 &string_view_array,
1757 vec![
1758 Some(Variant::from("short")),
1759 None,
1760 Some(Variant::from(
1761 "this is a much longer string that will be stored out-of-line in the buffer",
1762 )),
1763 ],
1764 );
1765 }
1766
1767 #[test]
1768 fn test_timestamp_second_row_builder() {
1769 use arrow::array::TimestampSecondArray;
1770
1771 let timestamp_data = vec![
1772 Some(1609459200), None,
1774 Some(1640995200), ];
1776 let timestamp_array = TimestampSecondArray::from(timestamp_data);
1777
1778 let expected_naive1 = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1779 let expected_naive2 = DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc();
1780
1781 test_row_builder_basic(
1782 ×tamp_array,
1783 vec![
1784 Some(Variant::from(expected_naive1)),
1785 None,
1786 Some(Variant::from(expected_naive2)),
1787 ],
1788 );
1789 }
1790
1791 #[test]
1792 fn test_timestamp_with_timezone_row_builder() {
1793 use arrow::array::TimestampMicrosecondArray;
1794 use chrono::DateTime;
1795
1796 let timestamp_data = vec![
1797 Some(1609459200000000), None,
1799 Some(1640995200000000), ];
1801 let timezone = "UTC".to_string();
1802 let timestamp_array =
1803 TimestampMicrosecondArray::from(timestamp_data).with_timezone(timezone);
1804
1805 let expected_utc1 = DateTime::from_timestamp(1609459200, 0).unwrap();
1806 let expected_utc2 = DateTime::from_timestamp(1640995200, 0).unwrap();
1807
1808 test_row_builder_basic(
1809 ×tamp_array,
1810 vec![
1811 Some(Variant::from(expected_utc1)),
1812 None,
1813 Some(Variant::from(expected_utc2)),
1814 ],
1815 );
1816 }
1817
1818 #[test]
1819 fn test_timestamp_nanosecond_precision_row_builder() {
1820 use arrow::array::TimestampNanosecondArray;
1821
1822 let timestamp_data = vec![
1823 Some(1609459200123456789), None,
1825 Some(1609459200000000000), ];
1827 let timestamp_array = TimestampNanosecondArray::from(timestamp_data);
1828
1829 let expected_with_nanos = DateTime::from_timestamp(1609459200, 123456789)
1830 .unwrap()
1831 .naive_utc();
1832 let expected_no_nanos = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1833
1834 test_row_builder_basic(
1835 ×tamp_array,
1836 vec![
1837 Some(Variant::from(expected_with_nanos)),
1838 None,
1839 Some(Variant::from(expected_no_nanos)),
1840 ],
1841 );
1842 }
1843
1844 #[test]
1845 fn test_timestamp_millisecond_row_builder() {
1846 use arrow::array::TimestampMillisecondArray;
1847
1848 let timestamp_data = vec![
1849 Some(1609459200123), None,
1851 Some(1609459200000), ];
1853 let timestamp_array = TimestampMillisecondArray::from(timestamp_data);
1854
1855 let expected_with_millis = DateTime::from_timestamp(1609459200, 123000000)
1856 .unwrap()
1857 .naive_utc();
1858 let expected_no_millis = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1859
1860 test_row_builder_basic(
1861 ×tamp_array,
1862 vec![
1863 Some(Variant::from(expected_with_millis)),
1864 None,
1865 Some(Variant::from(expected_no_millis)),
1866 ],
1867 );
1868 }
1869
1870 #[test]
1871 fn test_date32_row_builder() {
1872 use arrow::array::Date32Array;
1873 use chrono::NaiveDate;
1874
1875 let date_data = vec![
1876 Some(0), None,
1878 Some(19723), Some(-719162), ];
1881 let date_array = Date32Array::from(date_data);
1882
1883 let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1884 let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1885 let expected_min = NaiveDate::from_ymd_opt(1, 1, 1).unwrap();
1886
1887 test_row_builder_basic(
1888 &date_array,
1889 vec![
1890 Some(Variant::from(expected_epoch)),
1891 None,
1892 Some(Variant::from(expected_2024)),
1893 Some(Variant::from(expected_min)),
1894 ],
1895 );
1896 }
1897
1898 #[test]
1899 fn test_date64_row_builder() {
1900 use arrow::array::Date64Array;
1901 use chrono::NaiveDate;
1902
1903 let date_data = vec![
1905 Some(0), None,
1907 Some(1704067200000), Some(86400000), ];
1910 let date_array = Date64Array::from(date_data);
1911
1912 let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1913 let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1914 let expected_next_day = NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();
1915
1916 test_row_builder_basic(
1917 &date_array,
1918 vec![
1919 Some(Variant::from(expected_epoch)),
1920 None,
1921 Some(Variant::from(expected_2024)),
1922 Some(Variant::from(expected_next_day)),
1923 ],
1924 );
1925 }
1926
1927 #[test]
1928 fn test_time32_second_row_builder() {
1929 use arrow::array::Time32SecondArray;
1930 use chrono::NaiveTime;
1931
1932 let time_data = vec![
1934 Some(0), None,
1936 Some(3661), Some(86399), ];
1939 let time_array = Time32SecondArray::from(time_data);
1940
1941 let expected_midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
1942 let expected_time = NaiveTime::from_hms_opt(1, 1, 1).unwrap();
1943 let expected_last = NaiveTime::from_hms_opt(23, 59, 59).unwrap();
1944
1945 test_row_builder_basic(
1946 &time_array,
1947 vec![
1948 Some(Variant::from(expected_midnight)),
1949 None,
1950 Some(Variant::from(expected_time)),
1951 Some(Variant::from(expected_last)),
1952 ],
1953 );
1954 }
1955
1956 #[test]
1957 fn test_time32_millisecond_row_builder() {
1958 use arrow::array::Time32MillisecondArray;
1959 use chrono::NaiveTime;
1960
1961 let time_data = vec![
1963 Some(0), None,
1965 Some(3661123), Some(86399999), ];
1968 let time_array = Time32MillisecondArray::from(time_data);
1969
1970 let expected_midnight = NaiveTime::from_hms_milli_opt(0, 0, 0, 0).unwrap();
1971 let expected_time = NaiveTime::from_hms_milli_opt(1, 1, 1, 123).unwrap();
1972 let expected_last = NaiveTime::from_hms_milli_opt(23, 59, 59, 999).unwrap();
1973
1974 test_row_builder_basic(
1975 &time_array,
1976 vec![
1977 Some(Variant::from(expected_midnight)),
1978 None,
1979 Some(Variant::from(expected_time)),
1980 Some(Variant::from(expected_last)),
1981 ],
1982 );
1983 }
1984
1985 #[test]
1986 fn test_time64_microsecond_row_builder() {
1987 use arrow::array::Time64MicrosecondArray;
1988 use chrono::NaiveTime;
1989
1990 let time_data = vec![
1992 Some(0), None,
1994 Some(3661123456), Some(86399999999), ];
1997 let time_array = Time64MicrosecondArray::from(time_data);
1998
1999 let expected_midnight = NaiveTime::from_hms_micro_opt(0, 0, 0, 0).unwrap();
2000 let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2001 let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2002
2003 test_row_builder_basic(
2004 &time_array,
2005 vec![
2006 Some(Variant::from(expected_midnight)),
2007 None,
2008 Some(Variant::from(expected_time)),
2009 Some(Variant::from(expected_last)),
2010 ],
2011 );
2012 }
2013
2014 #[test]
2015 fn test_time64_nanosecond_row_builder() {
2016 use arrow::array::Time64NanosecondArray;
2017 use chrono::NaiveTime;
2018
2019 let time_data = vec![
2021 Some(0), None,
2023 Some(3661123456789), Some(86399999999999), ];
2026 let time_array = Time64NanosecondArray::from(time_data);
2027
2028 let expected_midnight = NaiveTime::from_hms_nano_opt(0, 0, 0, 0).unwrap();
2029 let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2031 let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2032
2033 test_row_builder_basic(
2034 &time_array,
2035 vec![
2036 Some(Variant::from(expected_midnight)),
2037 None,
2038 Some(Variant::from(expected_time)),
2039 Some(Variant::from(expected_last)),
2040 ],
2041 );
2042 }
2043}