1use crate::type_conversion::{CastOptions, decimal_to_variant_decimal};
19use arrow::array::{
20 Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray,
21 GenericStringArray, OffsetSizeTrait, PrimitiveArray,
22};
23use arrow::compute::kernels::cast;
24use arrow::datatypes::{
25 ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType, Date32Type,
26 Date64Type, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
27 RunEndIndexType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
28 Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType,
29 TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
30};
31use arrow::temporal_conversions::{as_date, as_datetime, as_time};
32use arrow_schema::{ArrowError, DataType, TimeUnit};
33use chrono::{DateTime, TimeZone, Utc};
34use parquet_variant::{
35 ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
36 VariantDecimal16,
37};
38use std::collections::HashMap;
39use std::ops::Range;
40
41pub(crate) enum ArrowToVariantRowBuilder<'a> {
47 Null(NullArrowToVariantBuilder),
48 Boolean(BooleanArrowToVariantBuilder<'a>),
49 PrimitiveInt8(PrimitiveArrowToVariantBuilder<'a, Int8Type>),
50 PrimitiveInt16(PrimitiveArrowToVariantBuilder<'a, Int16Type>),
51 PrimitiveInt32(PrimitiveArrowToVariantBuilder<'a, Int32Type>),
52 PrimitiveInt64(PrimitiveArrowToVariantBuilder<'a, Int64Type>),
53 PrimitiveUInt8(PrimitiveArrowToVariantBuilder<'a, UInt8Type>),
54 PrimitiveUInt16(PrimitiveArrowToVariantBuilder<'a, UInt16Type>),
55 PrimitiveUInt32(PrimitiveArrowToVariantBuilder<'a, UInt32Type>),
56 PrimitiveUInt64(PrimitiveArrowToVariantBuilder<'a, UInt64Type>),
57 PrimitiveFloat16(PrimitiveArrowToVariantBuilder<'a, Float16Type>),
58 PrimitiveFloat32(PrimitiveArrowToVariantBuilder<'a, Float32Type>),
59 PrimitiveFloat64(PrimitiveArrowToVariantBuilder<'a, Float64Type>),
60 Decimal32(Decimal32ArrowToVariantBuilder<'a>),
61 Decimal64(Decimal64ArrowToVariantBuilder<'a>),
62 Decimal128(Decimal128ArrowToVariantBuilder<'a>),
63 Decimal256(Decimal256ArrowToVariantBuilder<'a>),
64 TimestampSecond(TimestampArrowToVariantBuilder<'a, TimestampSecondType>),
65 TimestampMillisecond(TimestampArrowToVariantBuilder<'a, TimestampMillisecondType>),
66 TimestampMicrosecond(TimestampArrowToVariantBuilder<'a, TimestampMicrosecondType>),
67 TimestampNanosecond(TimestampArrowToVariantBuilder<'a, TimestampNanosecondType>),
68 Date32(DateArrowToVariantBuilder<'a, Date32Type>),
69 Date64(DateArrowToVariantBuilder<'a, Date64Type>),
70 Time32Second(TimeArrowToVariantBuilder<'a, Time32SecondType>),
71 Time32Millisecond(TimeArrowToVariantBuilder<'a, Time32MillisecondType>),
72 Time64Microsecond(TimeArrowToVariantBuilder<'a, Time64MicrosecondType>),
73 Time64Nanosecond(TimeArrowToVariantBuilder<'a, Time64NanosecondType>),
74 Binary(BinaryArrowToVariantBuilder<'a, i32>),
75 LargeBinary(BinaryArrowToVariantBuilder<'a, i64>),
76 BinaryView(BinaryViewArrowToVariantBuilder<'a>),
77 FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder<'a>),
78 Utf8(StringArrowToVariantBuilder<'a, i32>),
79 LargeUtf8(StringArrowToVariantBuilder<'a, i64>),
80 Utf8View(StringViewArrowToVariantBuilder<'a>),
81 List(ListArrowToVariantBuilder<'a, GenericListArray<i32>>),
82 LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
83 ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
84 LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
85 FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
86 Struct(StructArrowToVariantBuilder<'a>),
87 Map(MapArrowToVariantBuilder<'a>),
88 Union(UnionArrowToVariantBuilder<'a>),
89 Dictionary(DictionaryArrowToVariantBuilder<'a>),
90 RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder<'a, Int16Type>),
91 RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder<'a, Int32Type>),
92 RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder<'a, Int64Type>),
93}
94
95impl<'a> ArrowToVariantRowBuilder<'a> {
96 pub fn append_row(
98 &mut self,
99 builder: &mut impl VariantBuilderExt,
100 index: usize,
101 ) -> Result<(), ArrowError> {
102 use ArrowToVariantRowBuilder::*;
103 match self {
104 Null(b) => b.append_row(builder, index),
105 Boolean(b) => b.append_row(builder, index),
106 PrimitiveInt8(b) => b.append_row(builder, index),
107 PrimitiveInt16(b) => b.append_row(builder, index),
108 PrimitiveInt32(b) => b.append_row(builder, index),
109 PrimitiveInt64(b) => b.append_row(builder, index),
110 PrimitiveUInt8(b) => b.append_row(builder, index),
111 PrimitiveUInt16(b) => b.append_row(builder, index),
112 PrimitiveUInt32(b) => b.append_row(builder, index),
113 PrimitiveUInt64(b) => b.append_row(builder, index),
114 PrimitiveFloat16(b) => b.append_row(builder, index),
115 PrimitiveFloat32(b) => b.append_row(builder, index),
116 PrimitiveFloat64(b) => b.append_row(builder, index),
117 Decimal32(b) => b.append_row(builder, index),
118 Decimal64(b) => b.append_row(builder, index),
119 Decimal128(b) => b.append_row(builder, index),
120 Decimal256(b) => b.append_row(builder, index),
121 TimestampSecond(b) => b.append_row(builder, index),
122 TimestampMillisecond(b) => b.append_row(builder, index),
123 TimestampMicrosecond(b) => b.append_row(builder, index),
124 TimestampNanosecond(b) => b.append_row(builder, index),
125 Date32(b) => b.append_row(builder, index),
126 Date64(b) => b.append_row(builder, index),
127 Time32Second(b) => b.append_row(builder, index),
128 Time32Millisecond(b) => b.append_row(builder, index),
129 Time64Microsecond(b) => b.append_row(builder, index),
130 Time64Nanosecond(b) => b.append_row(builder, index),
131 Binary(b) => b.append_row(builder, index),
132 LargeBinary(b) => b.append_row(builder, index),
133 BinaryView(b) => b.append_row(builder, index),
134 FixedSizeBinary(b) => b.append_row(builder, index),
135 Utf8(b) => b.append_row(builder, index),
136 LargeUtf8(b) => b.append_row(builder, index),
137 Utf8View(b) => b.append_row(builder, index),
138 List(b) => b.append_row(builder, index),
139 LargeList(b) => b.append_row(builder, index),
140 ListView(b) => b.append_row(builder, index),
141 LargeListView(b) => b.append_row(builder, index),
142 FixedSizeList(b) => b.append_row(builder, index),
143 Struct(b) => b.append_row(builder, index),
144 Map(b) => b.append_row(builder, index),
145 Union(b) => b.append_row(builder, index),
146 Dictionary(b) => b.append_row(builder, index),
147 RunEndEncodedInt16(b) => b.append_row(builder, index),
148 RunEndEncodedInt32(b) => b.append_row(builder, index),
149 RunEndEncodedInt64(b) => b.append_row(builder, index),
150 }
151 }
152}
153
154pub(crate) fn make_arrow_to_variant_row_builder<'a>(
156 data_type: &'a DataType,
157 array: &'a dyn Array,
158 options: &'a CastOptions,
159) -> Result<ArrowToVariantRowBuilder<'a>, ArrowError> {
160 use ArrowToVariantRowBuilder::*;
161 let builder =
162 match data_type {
163 DataType::Null => Null(NullArrowToVariantBuilder),
164 DataType::Boolean => Boolean(BooleanArrowToVariantBuilder::new(array)),
165 DataType::Int8 => PrimitiveInt8(PrimitiveArrowToVariantBuilder::new(array)),
166 DataType::Int16 => PrimitiveInt16(PrimitiveArrowToVariantBuilder::new(array)),
167 DataType::Int32 => PrimitiveInt32(PrimitiveArrowToVariantBuilder::new(array)),
168 DataType::Int64 => PrimitiveInt64(PrimitiveArrowToVariantBuilder::new(array)),
169 DataType::UInt8 => PrimitiveUInt8(PrimitiveArrowToVariantBuilder::new(array)),
170 DataType::UInt16 => PrimitiveUInt16(PrimitiveArrowToVariantBuilder::new(array)),
171 DataType::UInt32 => PrimitiveUInt32(PrimitiveArrowToVariantBuilder::new(array)),
172 DataType::UInt64 => PrimitiveUInt64(PrimitiveArrowToVariantBuilder::new(array)),
173 DataType::Float16 => PrimitiveFloat16(PrimitiveArrowToVariantBuilder::new(array)),
174 DataType::Float32 => PrimitiveFloat32(PrimitiveArrowToVariantBuilder::new(array)),
175 DataType::Float64 => PrimitiveFloat64(PrimitiveArrowToVariantBuilder::new(array)),
176 DataType::Decimal32(_, scale) => {
177 Decimal32(Decimal32ArrowToVariantBuilder::new(array, options, *scale))
178 }
179 DataType::Decimal64(_, scale) => {
180 Decimal64(Decimal64ArrowToVariantBuilder::new(array, options, *scale))
181 }
182 DataType::Decimal128(_, scale) => {
183 Decimal128(Decimal128ArrowToVariantBuilder::new(array, options, *scale))
184 }
185 DataType::Decimal256(_, scale) => {
186 Decimal256(Decimal256ArrowToVariantBuilder::new(array, options, *scale))
187 }
188 DataType::Timestamp(time_unit, time_zone) => {
189 match time_unit {
190 TimeUnit::Second => TimestampSecond(TimestampArrowToVariantBuilder::new(
191 array,
192 options,
193 time_zone.is_some(),
194 )),
195 TimeUnit::Millisecond => TimestampMillisecond(
196 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
197 ),
198 TimeUnit::Microsecond => TimestampMicrosecond(
199 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
200 ),
201 TimeUnit::Nanosecond => TimestampNanosecond(
202 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
203 ),
204 }
205 }
206 DataType::Date32 => Date32(DateArrowToVariantBuilder::new(array, options)),
207 DataType::Date64 => Date64(DateArrowToVariantBuilder::new(array, options)),
208 DataType::Time32(time_unit) => match time_unit {
209 TimeUnit::Second => Time32Second(TimeArrowToVariantBuilder::new(array, options)),
210 TimeUnit::Millisecond => {
211 Time32Millisecond(TimeArrowToVariantBuilder::new(array, options))
212 }
213 _ => {
214 return Err(ArrowError::CastError(format!(
215 "Unsupported Time32 unit: {time_unit:?}"
216 )));
217 }
218 },
219 DataType::Time64(time_unit) => match time_unit {
220 TimeUnit::Microsecond => {
221 Time64Microsecond(TimeArrowToVariantBuilder::new(array, options))
222 }
223 TimeUnit::Nanosecond => {
224 Time64Nanosecond(TimeArrowToVariantBuilder::new(array, options))
225 }
226 _ => {
227 return Err(ArrowError::CastError(format!(
228 "Unsupported Time64 unit: {time_unit:?}"
229 )));
230 }
231 },
232 DataType::Duration(_) | DataType::Interval(_) => {
233 return Err(ArrowError::InvalidArgumentError(
234 "Casting duration/interval types to Variant is not supported. \
235 The Variant format does not define duration/interval types."
236 .to_string(),
237 ));
238 }
239 DataType::Binary => Binary(BinaryArrowToVariantBuilder::new(array)),
240 DataType::LargeBinary => LargeBinary(BinaryArrowToVariantBuilder::new(array)),
241 DataType::BinaryView => BinaryView(BinaryViewArrowToVariantBuilder::new(array)),
242 DataType::FixedSizeBinary(_) => {
243 FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder::new(array))
244 }
245 DataType::Utf8 => Utf8(StringArrowToVariantBuilder::new(array)),
246 DataType::LargeUtf8 => LargeUtf8(StringArrowToVariantBuilder::new(array)),
247 DataType::Utf8View => Utf8View(StringViewArrowToVariantBuilder::new(array)),
248 DataType::List(_) => List(ListArrowToVariantBuilder::new(array.as_list(), options)?),
249 DataType::LargeList(_) => {
250 LargeList(ListArrowToVariantBuilder::new(array.as_list(), options)?)
251 }
252 DataType::ListView(_) => ListView(ListArrowToVariantBuilder::new(
253 array.as_list_view(),
254 options,
255 )?),
256 DataType::LargeListView(_) => LargeListView(ListArrowToVariantBuilder::new(
257 array.as_list_view(),
258 options,
259 )?),
260 DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
261 array.as_fixed_size_list(),
262 options,
263 )?),
264 DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
265 array.as_struct(),
266 options,
267 )?),
268 DataType::Map(_, _) => Map(MapArrowToVariantBuilder::new(array, options)?),
269 DataType::Union(_, _) => Union(UnionArrowToVariantBuilder::new(array, options)?),
270 DataType::Dictionary(_, _) => {
271 Dictionary(DictionaryArrowToVariantBuilder::new(array, options)?)
272 }
273 DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
274 DataType::Int16 => {
275 RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
276 }
277 DataType::Int32 => {
278 RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
279 }
280 DataType::Int64 => {
281 RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
282 }
283 _ => {
284 return Err(ArrowError::CastError(format!(
285 "Unsupported run ends type: {}",
286 run_ends.data_type()
287 )));
288 }
289 },
290 };
291 Ok(builder)
292}
293
294macro_rules! define_row_builder {
322 (
323 struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
324 $( where $where_path:path: $where_bound:path $(,)? )?
325 $({ $($field:ident: $field_type:ty),+ $(,)? })?,
326 |$array_param:ident| -> $array_type:ty { $init_expr:expr }
327 $(, |$value:ident| $(-> Option<$option_ty:ty>)? $value_transform:expr)?
328 ) => {
329 pub(crate) struct $name<$lifetime $(, $generic: $bound )?>
330 $( where $where_path: $where_bound )?
331 {
332 array: &$lifetime $array_type,
333 $( $( $field: $field_type, )+ )?
334 }
335
336 impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic)?>
337 $( where $where_path: $where_bound )?
338 {
339 pub(crate) fn new($array_param: &$lifetime dyn Array $(, $( $field: $field_type ),+ )?) -> Self {
340 Self {
341 array: $init_expr,
342 $( $( $field, )+ )?
343 }
344 }
345
346 fn append_row(&self, builder: &mut impl VariantBuilderExt, index: usize) -> Result<(), ArrowError> {
347 if self.array.is_null(index) {
348 builder.append_null();
349 } else {
350 $(
359 #[allow(unused)]
360 $( let $field = &self.$field; )+
361 )?
362
363 let value = self.array.value(index);
365 $(
366 let $value = value;
367 let value = $value_transform;
368 $(
369 let Some(value): Option<$option_ty> = value else {
371 if self.options.strict {
372 return Err(ArrowError::ComputeError(format!(
373 "Failed to convert value at index {index}: conversion failed",
374 )));
375 } else {
376 builder.append_value(Variant::Null);
379 return Ok(());
380 }
381 };
382 )?
383 )?
384 builder.append_value(value);
385 }
386 Ok(())
387 }
388 }
389 };
390}
391
392define_row_builder!(
393 struct BooleanArrowToVariantBuilder<'a>,
394 |array| -> arrow::array::BooleanArray { array.as_boolean() }
395);
396
397define_row_builder!(
398 struct PrimitiveArrowToVariantBuilder<'a, T: ArrowPrimitiveType>
399 where T::Native: Into<Variant<'a, 'a>>,
400 |array| -> PrimitiveArray<T> { array.as_primitive() }
401);
402
403define_row_builder!(
404 struct Decimal32ArrowToVariantBuilder<'a> {
405 options: &'a CastOptions,
406 scale: i8,
407 },
408 |array| -> arrow::array::Decimal32Array { array.as_primitive() },
409 |value| -> Option<_> { decimal_to_variant_decimal!(value, scale, i32, VariantDecimal4) }
410);
411
412define_row_builder!(
413 struct Decimal64ArrowToVariantBuilder<'a> {
414 options: &'a CastOptions,
415 scale: i8,
416 },
417 |array| -> arrow::array::Decimal64Array { array.as_primitive() },
418 |value| -> Option<_> { decimal_to_variant_decimal!(value, scale, i64, VariantDecimal8) }
419);
420
421define_row_builder!(
422 struct Decimal128ArrowToVariantBuilder<'a> {
423 options: &'a CastOptions,
424 scale: i8,
425 },
426 |array| -> arrow::array::Decimal128Array { array.as_primitive() },
427 |value| -> Option<_> { decimal_to_variant_decimal!(value, scale, i128, VariantDecimal16) }
428);
429
430define_row_builder!(
431 struct Decimal256ArrowToVariantBuilder<'a> {
432 options: &'a CastOptions,
433 scale: i8,
434 },
435 |array| -> arrow::array::Decimal256Array { array.as_primitive() },
436 |value| -> Option<_> {
437 value.to_i128().and_then(|i128_val| {
439 decimal_to_variant_decimal!(i128_val, scale, i128, VariantDecimal16)
440 })
441 }
442);
443
444define_row_builder!(
445 struct TimestampArrowToVariantBuilder<'a, T: ArrowTimestampType> {
446 options: &'a CastOptions,
447 has_time_zone: bool,
448 },
449 |array| -> PrimitiveArray<T> { array.as_primitive() },
450 |value| -> Option<_> {
451 as_datetime::<T>(value).map(|naive_datetime| {
453 if *has_time_zone {
454 let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&naive_datetime);
456 Variant::from(utc_dt) } else {
458 Variant::from(naive_datetime) }
461 })
462 }
463);
464
465define_row_builder!(
466 struct DateArrowToVariantBuilder<'a, T: ArrowTemporalType>
467 where
468 i64: From<T::Native>,
469 {
470 options: &'a CastOptions,
471 },
472 |array| -> PrimitiveArray<T> { array.as_primitive() },
473 |value| -> Option<_> {
474 let date_value = i64::from(value);
475 as_date::<T>(date_value)
476 }
477);
478
479define_row_builder!(
480 struct TimeArrowToVariantBuilder<'a, T: ArrowTemporalType>
481 where
482 i64: From<T::Native>,
483 {
484 options: &'a CastOptions,
485 },
486 |array| -> PrimitiveArray<T> { array.as_primitive() },
487 |value| -> Option<_> {
488 let time_value = i64::from(value);
489 as_time::<T>(time_value)
490 }
491);
492
493define_row_builder!(
494 struct BinaryArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
495 |array| -> GenericBinaryArray<O> { array.as_binary() }
496);
497
498define_row_builder!(
499 struct BinaryViewArrowToVariantBuilder<'a>,
500 |array| -> arrow::array::BinaryViewArray { array.as_byte_view() }
501);
502
503define_row_builder!(
504 struct FixedSizeBinaryArrowToVariantBuilder<'a>,
505 |array| -> arrow::array::FixedSizeBinaryArray { array.as_fixed_size_binary() }
506);
507
508define_row_builder!(
509 struct StringArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
510 |array| -> GenericStringArray<O> { array.as_string() }
511);
512
513define_row_builder!(
514 struct StringViewArrowToVariantBuilder<'a>,
515 |array| -> arrow::array::StringViewArray { array.as_string_view() }
516);
517
518pub(crate) struct NullArrowToVariantBuilder;
520
521impl NullArrowToVariantBuilder {
522 fn append_row(
523 &mut self,
524 builder: &mut impl VariantBuilderExt,
525 _index: usize,
526 ) -> Result<(), ArrowError> {
527 builder.append_null();
528 Ok(())
529 }
530}
531
532pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
535 list_array: &'a L,
536 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
537}
538
539impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
540 pub(crate) fn new(array: &'a L, options: &'a CastOptions) -> Result<Self, ArrowError> {
541 let values = array.values();
542 let values_builder =
543 make_arrow_to_variant_row_builder(values.data_type(), values, options)?;
544
545 Ok(Self {
546 list_array: array,
547 values_builder: Box::new(values_builder),
548 })
549 }
550
551 fn append_row(
552 &mut self,
553 builder: &mut impl VariantBuilderExt,
554 index: usize,
555 ) -> Result<(), ArrowError> {
556 if self.list_array.is_null(index) {
557 builder.append_null();
558 return Ok(());
559 }
560
561 let range = self.list_array.element_range(index);
562
563 let mut list_builder = builder.try_new_list()?;
564 for value_index in range {
565 self.values_builder
566 .append_row(&mut list_builder, value_index)?;
567 }
568 list_builder.finish();
569 Ok(())
570 }
571}
572
573pub(crate) trait ListLikeArray: Array {
575 fn values(&self) -> &dyn Array;
577
578 fn element_range(&self, index: usize) -> Range<usize>;
580}
581
582impl<O: OffsetSizeTrait> ListLikeArray for GenericListArray<O> {
583 fn values(&self) -> &dyn Array {
584 self.values()
585 }
586
587 fn element_range(&self, index: usize) -> Range<usize> {
588 let offsets = self.offsets();
589 let start = offsets[index].as_usize();
590 let end = offsets[index + 1].as_usize();
591 start..end
592 }
593}
594
595impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
596 fn values(&self) -> &dyn Array {
597 self.values()
598 }
599
600 fn element_range(&self, index: usize) -> Range<usize> {
601 let offsets = self.value_offsets();
602 let sizes = self.value_sizes();
603 let offset = offsets[index].as_usize();
604 let size = sizes[index].as_usize();
605 offset..(offset + size)
606 }
607}
608
609impl ListLikeArray for FixedSizeListArray {
610 fn values(&self) -> &dyn Array {
611 self.values()
612 }
613
614 fn element_range(&self, index: usize) -> Range<usize> {
615 let value_length = self.value_length().as_usize();
616 let offset = index * value_length;
617 offset..(offset + value_length)
618 }
619}
620
621pub(crate) struct StructArrowToVariantBuilder<'a> {
623 struct_array: &'a arrow::array::StructArray,
624 field_builders: Vec<(&'a str, ArrowToVariantRowBuilder<'a>)>,
625}
626
627impl<'a> StructArrowToVariantBuilder<'a> {
628 pub(crate) fn new(
629 struct_array: &'a arrow::array::StructArray,
630 options: &'a CastOptions,
631 ) -> Result<Self, ArrowError> {
632 let mut field_builders = Vec::new();
633
634 for (field_name, field_array) in struct_array
636 .column_names()
637 .iter()
638 .zip(struct_array.columns().iter())
639 {
640 let field_builder = make_arrow_to_variant_row_builder(
641 field_array.data_type(),
642 field_array.as_ref(),
643 options,
644 )?;
645 field_builders.push((*field_name, field_builder));
646 }
647
648 Ok(Self {
649 struct_array,
650 field_builders,
651 })
652 }
653
654 fn append_row(
655 &mut self,
656 builder: &mut impl VariantBuilderExt,
657 index: usize,
658 ) -> Result<(), ArrowError> {
659 if self.struct_array.is_null(index) {
660 builder.append_null();
661 } else {
662 let mut obj_builder = builder.try_new_object()?;
664
665 for (field_name, row_builder) in &mut self.field_builders {
667 let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
668 row_builder.append_row(&mut field_builder, index)?;
669 }
670
671 obj_builder.finish();
672 }
673 Ok(())
674 }
675}
676
677pub(crate) struct MapArrowToVariantBuilder<'a> {
679 map_array: &'a arrow::array::MapArray,
680 key_strings: arrow::array::StringArray,
681 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
682}
683
684impl<'a> MapArrowToVariantBuilder<'a> {
685 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
686 let map_array = array.as_map();
687
688 let keys = cast(map_array.keys(), &DataType::Utf8)?;
690 let key_strings = keys.as_string::<i32>().clone();
691
692 let values = map_array.values();
694 let values_builder =
695 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
696
697 Ok(Self {
698 map_array,
699 key_strings,
700 values_builder: Box::new(values_builder),
701 })
702 }
703
704 fn append_row(
705 &mut self,
706 builder: &mut impl VariantBuilderExt,
707 index: usize,
708 ) -> Result<(), ArrowError> {
709 if self.map_array.is_null(index) {
711 builder.append_null();
712 return Ok(());
713 }
714
715 let offsets = self.map_array.offsets();
716 let start = offsets[index].as_usize();
717 let end = offsets[index + 1].as_usize();
718
719 let mut object_builder = builder.try_new_object()?;
721
722 for kv_index in start..end {
724 let key = self.key_strings.value(kv_index);
725 let mut field_builder = ObjectFieldBuilder::new(key, &mut object_builder);
726 self.values_builder
727 .append_row(&mut field_builder, kv_index)?;
728 }
729
730 object_builder.finish();
731 Ok(())
732 }
733}
734
735pub(crate) struct UnionArrowToVariantBuilder<'a> {
739 union_array: &'a arrow::array::UnionArray,
740 child_builders: HashMap<i8, Box<ArrowToVariantRowBuilder<'a>>>,
741}
742
743impl<'a> UnionArrowToVariantBuilder<'a> {
744 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
745 let union_array = array.as_union();
746 let type_ids = union_array.type_ids();
747
748 let mut child_builders = HashMap::new();
750 for &type_id in type_ids {
751 let child_array = union_array.child(type_id);
752 let child_builder = make_arrow_to_variant_row_builder(
753 child_array.data_type(),
754 child_array.as_ref(),
755 options,
756 )?;
757 child_builders.insert(type_id, Box::new(child_builder));
758 }
759
760 Ok(Self {
761 union_array,
762 child_builders,
763 })
764 }
765
766 fn append_row(
767 &mut self,
768 builder: &mut impl VariantBuilderExt,
769 index: usize,
770 ) -> Result<(), ArrowError> {
771 let type_id = self.union_array.type_id(index);
772 let value_offset = self.union_array.value_offset(index);
773
774 match self.child_builders.get_mut(&type_id) {
776 Some(child_builder) => child_builder.append_row(builder, value_offset)?,
777 None => builder.append_null(),
778 }
779
780 Ok(())
781 }
782}
783
784pub(crate) struct DictionaryArrowToVariantBuilder<'a> {
786 keys: &'a dyn Array, normalized_keys: Vec<usize>,
788 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
789}
790
791impl<'a> DictionaryArrowToVariantBuilder<'a> {
792 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
793 let dict_array = array.as_any_dictionary();
794 let values = dict_array.values();
795 let values_builder =
796 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
797
798 let normalized_keys = match values.len() {
800 0 => Vec::new(),
801 _ => dict_array.normalized_keys(),
802 };
803
804 Ok(Self {
805 keys: dict_array.keys(),
806 normalized_keys,
807 values_builder: Box::new(values_builder),
808 })
809 }
810
811 fn append_row(
812 &mut self,
813 builder: &mut impl VariantBuilderExt,
814 index: usize,
815 ) -> Result<(), ArrowError> {
816 if self.keys.is_null(index) {
817 builder.append_null();
818 } else {
819 let normalized_key = self.normalized_keys[index];
820 self.values_builder.append_row(builder, normalized_key)?;
821 }
822 Ok(())
823 }
824}
825
826pub(crate) struct RunEndEncodedArrowToVariantBuilder<'a, R: RunEndIndexType> {
828 run_array: &'a arrow::array::RunArray<R>,
829 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
830
831 run_ends: &'a [R::Native],
832 run_number: usize, run_start: usize, }
835
836impl<'a, R: RunEndIndexType> RunEndEncodedArrowToVariantBuilder<'a, R> {
837 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
838 let Some(run_array) = array.as_run_opt() else {
839 return Err(ArrowError::CastError("Expected RunArray".to_string()));
840 };
841
842 let values = run_array.values();
843 let values_builder =
844 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
845
846 Ok(Self {
847 run_array,
848 values_builder: Box::new(values_builder),
849 run_ends: run_array.run_ends().values(),
850 run_number: 0,
851 run_start: 0,
852 })
853 }
854
855 fn set_run_for_index(&mut self, index: usize) -> Result<(), ArrowError> {
856 if index >= self.run_start {
857 let Some(run_end) = self.run_ends.get(self.run_number) else {
858 return Err(ArrowError::CastError(format!(
859 "Index {index} beyond run array"
860 )));
861 };
862 if index < run_end.as_usize() {
863 return Ok(());
864 }
865 if index == run_end.as_usize() {
866 self.run_number += 1;
867 self.run_start = run_end.as_usize();
868 return Ok(());
869 }
870 }
871
872 let run_number = self
874 .run_ends
875 .partition_point(|&run_end| run_end.as_usize() <= index);
876 if run_number >= self.run_ends.len() {
877 return Err(ArrowError::CastError(format!(
878 "Index {index} beyond run array"
879 )));
880 }
881 self.run_number = run_number;
882 self.run_start = match run_number {
883 0 => 0,
884 _ => self.run_ends[run_number - 1].as_usize(),
885 };
886 Ok(())
887 }
888
889 fn append_row(
890 &mut self,
891 builder: &mut impl VariantBuilderExt,
892 index: usize,
893 ) -> Result<(), ArrowError> {
894 self.set_run_for_index(index)?;
895
896 if self.run_array.values().is_null(self.run_number) {
898 builder.append_null();
899 return Ok(());
900 }
901
902 self.values_builder.append_row(builder, self.run_number)?;
904
905 Ok(())
906 }
907}
908
909#[cfg(test)]
910mod tests {
911 use super::*;
912 use crate::{VariantArray, VariantArrayBuilder};
913 use arrow::array::{ArrayRef, BooleanArray, Int32Array, StringArray};
914 use std::sync::Arc;
915
916 fn execute_row_builder_test(array: &dyn Array) -> VariantArray {
918 execute_row_builder_test_with_options(array, CastOptions::default())
919 }
920
921 fn execute_row_builder_test_with_options(
923 array: &dyn Array,
924 options: CastOptions,
925 ) -> VariantArray {
926 let mut row_builder =
927 make_arrow_to_variant_row_builder(array.data_type(), array, &options).unwrap();
928
929 let mut array_builder = VariantArrayBuilder::new(array.len());
930
931 for i in 0..array.len() {
933 row_builder.append_row(&mut array_builder, i).unwrap();
934 }
935
936 let variant_array = array_builder.build();
937 assert_eq!(variant_array.len(), array.len());
938 variant_array
939 }
940
941 fn test_row_builder_basic(array: &dyn Array, expected_values: Vec<Option<Variant>>) {
944 test_row_builder_basic_with_options(array, expected_values, CastOptions::default());
945 }
946
947 fn test_row_builder_basic_with_options(
949 array: &dyn Array,
950 expected_values: Vec<Option<Variant>>,
951 options: CastOptions,
952 ) {
953 let variant_array = execute_row_builder_test_with_options(array, options);
954
955 for (i, expected) in expected_values.iter().enumerate() {
957 match expected {
958 Some(variant) => {
959 assert_eq!(variant_array.value(i), *variant, "Mismatch at index {}", i)
960 }
961 None => assert!(variant_array.is_null(i), "Expected null at index {}", i),
962 }
963 }
964 }
965
966 #[test]
967 fn test_primitive_row_builder() {
968 let int_array = Int32Array::from(vec![Some(42), None, Some(100)]);
969 test_row_builder_basic(
970 &int_array,
971 vec![Some(Variant::Int32(42)), None, Some(Variant::Int32(100))],
972 );
973 }
974
975 #[test]
976 fn test_string_row_builder() {
977 let string_array = StringArray::from(vec![Some("hello"), None, Some("world")]);
978 test_row_builder_basic(
979 &string_array,
980 vec![
981 Some(Variant::from("hello")),
982 None,
983 Some(Variant::from("world")),
984 ],
985 );
986 }
987
988 #[test]
989 fn test_boolean_row_builder() {
990 let bool_array = BooleanArray::from(vec![Some(true), None, Some(false)]);
991 test_row_builder_basic(
992 &bool_array,
993 vec![Some(Variant::from(true)), None, Some(Variant::from(false))],
994 );
995 }
996
997 #[test]
998 fn test_struct_row_builder() {
999 use arrow::array::{ArrayRef, Int32Array, StringArray, StructArray};
1000 use arrow_schema::{DataType, Field};
1001 use std::sync::Arc;
1002
1003 let int_field = Field::new("id", DataType::Int32, true);
1005 let string_field = Field::new("name", DataType::Utf8, true);
1006
1007 let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1008 let string_array = StringArray::from(vec![Some("Alice"), Some("Bob"), None]);
1009
1010 let struct_array = StructArray::try_new(
1011 vec![int_field, string_field].into(),
1012 vec![
1013 Arc::new(int_array) as ArrayRef,
1014 Arc::new(string_array) as ArrayRef,
1015 ],
1016 None,
1017 )
1018 .unwrap();
1019
1020 let variant_array = execute_row_builder_test(&struct_array);
1021
1022 let first_variant = variant_array.value(0);
1024 assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1025 assert_eq!(
1026 first_variant.get_object_field("name"),
1027 Some(Variant::from("Alice"))
1028 );
1029
1030 let second_variant = variant_array.value(1);
1032 assert_eq!(second_variant.get_object_field("id"), None); assert_eq!(
1034 second_variant.get_object_field("name"),
1035 Some(Variant::from("Bob"))
1036 );
1037
1038 let third_variant = variant_array.value(2);
1040 assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(3)));
1041 assert_eq!(third_variant.get_object_field("name"), None); }
1043
1044 #[test]
1045 fn test_run_end_encoded_row_builder() {
1046 use arrow::array::{Int32Array, RunArray};
1047 use arrow::datatypes::Int32Type;
1048
1049 let values = StringArray::from(vec!["A", "B", "C"]);
1053 let run_ends = Int32Array::from(vec![2, 5, 6]);
1054 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1055
1056 let variant_array = execute_row_builder_test(&run_array);
1057
1058 assert_eq!(variant_array.value(0), Variant::from("A")); assert_eq!(variant_array.value(1), Variant::from("A")); assert_eq!(variant_array.value(2), Variant::from("B")); assert_eq!(variant_array.value(3), Variant::from("B")); assert_eq!(variant_array.value(4), Variant::from("B")); assert_eq!(variant_array.value(5), Variant::from("C")); }
1066
1067 #[test]
1068 fn test_run_end_encoded_random_access() {
1069 use arrow::array::{Int32Array, RunArray};
1070 use arrow::datatypes::Int32Type;
1071
1072 let values = StringArray::from(vec!["A", "B", "C"]);
1074 let run_ends = Int32Array::from(vec![2, 5, 6]);
1075 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1076
1077 let options = CastOptions::default();
1078 let mut row_builder =
1079 make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1080
1081 let access_pattern = [0, 5, 2, 4, 1, 3]; let expected_values = ["A", "C", "B", "B", "A", "B"];
1084
1085 for (i, &index) in access_pattern.iter().enumerate() {
1086 let mut array_builder = VariantArrayBuilder::new(1);
1087 row_builder.append_row(&mut array_builder, index).unwrap();
1088 let variant_array = array_builder.build();
1089 assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1090 }
1091 }
1092
1093 #[test]
1094 fn test_run_end_encoded_with_nulls() {
1095 use arrow::array::{Int32Array, RunArray};
1096 use arrow::datatypes::Int32Type;
1097
1098 let values = StringArray::from(vec![Some("A"), None, Some("B")]);
1100 let run_ends = Int32Array::from(vec![2, 4, 5]);
1101 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1102
1103 let options = CastOptions::default();
1104 let mut row_builder =
1105 make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1106 let mut array_builder = VariantArrayBuilder::new(5);
1107
1108 for i in 0..5 {
1110 row_builder.append_row(&mut array_builder, i).unwrap();
1111 }
1112
1113 let variant_array = array_builder.build();
1114 assert_eq!(variant_array.len(), 5);
1115
1116 assert_eq!(variant_array.value(0), Variant::from("A")); assert_eq!(variant_array.value(1), Variant::from("A")); assert!(variant_array.is_null(2)); assert!(variant_array.is_null(3)); assert_eq!(variant_array.value(4), Variant::from("B")); }
1123
1124 #[test]
1125 fn test_dictionary_row_builder() {
1126 use arrow::array::{DictionaryArray, Int32Array};
1127 use arrow::datatypes::Int32Type;
1128
1129 let values = StringArray::from(vec!["apple", "banana", "cherry"]);
1131 let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1132 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1133
1134 let variant_array = execute_row_builder_test(&dict_array);
1135
1136 assert_eq!(variant_array.value(0), Variant::from("apple")); assert_eq!(variant_array.value(1), Variant::from("banana")); assert_eq!(variant_array.value(2), Variant::from("apple")); assert_eq!(variant_array.value(3), Variant::from("cherry")); assert_eq!(variant_array.value(4), Variant::from("banana")); }
1143
1144 #[test]
1145 fn test_dictionary_with_nulls() {
1146 use arrow::array::{DictionaryArray, Int32Array};
1147 use arrow::datatypes::Int32Type;
1148
1149 let values = StringArray::from(vec!["x", "y", "z"]);
1151 let keys = Int32Array::from(vec![Some(0), None, Some(1), None, Some(2)]);
1152 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1153
1154 let options = CastOptions::default();
1155 let mut row_builder =
1156 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1157 .unwrap();
1158 let mut array_builder = VariantArrayBuilder::new(5);
1159
1160 for i in 0..5 {
1162 row_builder.append_row(&mut array_builder, i).unwrap();
1163 }
1164
1165 let variant_array = array_builder.build();
1166 assert_eq!(variant_array.len(), 5);
1167
1168 assert_eq!(variant_array.value(0), Variant::from("x")); assert!(variant_array.is_null(1)); assert_eq!(variant_array.value(2), Variant::from("y")); assert!(variant_array.is_null(3)); assert_eq!(variant_array.value(4), Variant::from("z")); }
1175
1176 #[test]
1177 fn test_dictionary_random_access() {
1178 use arrow::array::{DictionaryArray, Int32Array};
1179 use arrow::datatypes::Int32Type;
1180
1181 let values = StringArray::from(vec!["red", "green", "blue"]);
1183 let keys = Int32Array::from(vec![0, 1, 2, 0, 1, 2]);
1184 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1185
1186 let options = CastOptions::default();
1187 let mut row_builder =
1188 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1189 .unwrap();
1190
1191 let access_pattern = [5, 0, 3, 1, 4, 2]; let expected_values = ["blue", "red", "red", "green", "green", "blue"];
1194
1195 for (i, &index) in access_pattern.iter().enumerate() {
1196 let mut array_builder = VariantArrayBuilder::new(1);
1197 row_builder.append_row(&mut array_builder, index).unwrap();
1198 let variant_array = array_builder.build();
1199 assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1200 }
1201 }
1202
1203 #[test]
1204 fn test_nested_dictionary() {
1205 use arrow::array::{DictionaryArray, Int32Array, StructArray};
1206 use arrow::datatypes::{Field, Int32Type};
1207
1208 let id_array = Int32Array::from(vec![1, 2, 3]);
1210 let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
1211 let struct_array = StructArray::from(vec![
1212 (
1213 Arc::new(Field::new("id", DataType::Int32, false)),
1214 Arc::new(id_array) as ArrayRef,
1215 ),
1216 (
1217 Arc::new(Field::new("name", DataType::Utf8, false)),
1218 Arc::new(name_array) as ArrayRef,
1219 ),
1220 ]);
1221
1222 let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1223 let dict_array =
1224 DictionaryArray::<Int32Type>::try_new(keys, Arc::new(struct_array)).unwrap();
1225
1226 let options = CastOptions::default();
1227 let mut row_builder =
1228 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1229 .unwrap();
1230 let mut array_builder = VariantArrayBuilder::new(5);
1231
1232 for i in 0..5 {
1234 row_builder.append_row(&mut array_builder, i).unwrap();
1235 }
1236
1237 let variant_array = array_builder.build();
1238 assert_eq!(variant_array.len(), 5);
1239
1240 let first_variant = variant_array.value(0);
1242 assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1243 assert_eq!(
1244 first_variant.get_object_field("name"),
1245 Some(Variant::from("Alice"))
1246 );
1247
1248 let second_variant = variant_array.value(1);
1249 assert_eq!(
1250 second_variant.get_object_field("id"),
1251 Some(Variant::from(2))
1252 );
1253 assert_eq!(
1254 second_variant.get_object_field("name"),
1255 Some(Variant::from("Bob"))
1256 );
1257
1258 let third_variant = variant_array.value(2);
1260 assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(1)));
1261 assert_eq!(
1262 third_variant.get_object_field("name"),
1263 Some(Variant::from("Alice"))
1264 );
1265 }
1266
1267 #[test]
1268 fn test_list_row_builder() {
1269 use arrow::array::ListArray;
1270
1271 let data = vec![
1273 Some(vec![Some(1), Some(2)]),
1274 Some(vec![Some(3), Some(4), Some(5)]),
1275 None,
1276 Some(vec![]),
1277 ];
1278 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1279
1280 let variant_array = execute_row_builder_test(&list_array);
1281
1282 let row0 = variant_array.value(0);
1284 let list0 = row0.as_list().unwrap();
1285 assert_eq!(list0.len(), 2);
1286 assert_eq!(list0.get(0), Some(Variant::from(1)));
1287 assert_eq!(list0.get(1), Some(Variant::from(2)));
1288
1289 let row1 = variant_array.value(1);
1291 let list1 = row1.as_list().unwrap();
1292 assert_eq!(list1.len(), 3);
1293 assert_eq!(list1.get(0), Some(Variant::from(3)));
1294 assert_eq!(list1.get(1), Some(Variant::from(4)));
1295 assert_eq!(list1.get(2), Some(Variant::from(5)));
1296
1297 assert!(variant_array.is_null(2));
1299
1300 let row3 = variant_array.value(3);
1302 let list3 = row3.as_list().unwrap();
1303 assert_eq!(list3.len(), 0);
1304 }
1305
1306 #[test]
1307 fn test_sliced_list_row_builder() {
1308 use arrow::array::ListArray;
1309
1310 let data = vec![
1312 Some(vec![Some(1), Some(2)]),
1313 Some(vec![Some(3), Some(4), Some(5)]),
1314 Some(vec![Some(6)]),
1315 ];
1316 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1317
1318 let sliced_array = list_array.slice(1, 1);
1320
1321 let options = CastOptions::default();
1322 let mut row_builder =
1323 make_arrow_to_variant_row_builder(sliced_array.data_type(), &sliced_array, &options)
1324 .unwrap();
1325 let mut variant_array_builder = VariantArrayBuilder::new(sliced_array.len());
1326
1327 row_builder
1329 .append_row(&mut variant_array_builder, 0)
1330 .unwrap();
1331 let variant_array = variant_array_builder.build();
1332
1333 assert_eq!(variant_array.len(), 1);
1335
1336 let row0 = variant_array.value(0);
1338 let list0 = row0.as_list().unwrap();
1339 assert_eq!(list0.len(), 3);
1340 assert_eq!(list0.get(0), Some(Variant::from(3)));
1341 assert_eq!(list0.get(1), Some(Variant::from(4)));
1342 assert_eq!(list0.get(2), Some(Variant::from(5)));
1343 }
1344
1345 #[test]
1346 fn test_nested_list_row_builder() {
1347 use arrow::array::ListArray;
1348 use arrow::datatypes::Field;
1349
1350 let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
1352 let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_field), true));
1353
1354 let values_data = vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])];
1355 let values_list = ListArray::from_iter_primitive::<Int32Type, _, _>(values_data);
1356
1357 let outer_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 2].into());
1358 let outer_list = ListArray::new(
1359 inner_list_field,
1360 outer_offsets,
1361 Arc::new(values_list),
1362 Some(arrow::buffer::NullBuffer::from(vec![true, false])),
1363 );
1364
1365 let options = CastOptions::default();
1366 let mut row_builder =
1367 make_arrow_to_variant_row_builder(outer_list.data_type(), &outer_list, &options)
1368 .unwrap();
1369 let mut variant_array_builder = VariantArrayBuilder::new(outer_list.len());
1370
1371 for i in 0..outer_list.len() {
1372 row_builder
1373 .append_row(&mut variant_array_builder, i)
1374 .unwrap();
1375 }
1376
1377 let variant_array = variant_array_builder.build();
1378
1379 assert_eq!(variant_array.len(), 2);
1381
1382 let row0 = variant_array.value(0);
1384 let outer_list0 = row0.as_list().unwrap();
1385 assert_eq!(outer_list0.len(), 2);
1386
1387 let inner_list0_0 = outer_list0.get(0).unwrap();
1388 let inner_list0_0 = inner_list0_0.as_list().unwrap();
1389 assert_eq!(inner_list0_0.len(), 2);
1390 assert_eq!(inner_list0_0.get(0), Some(Variant::from(1)));
1391 assert_eq!(inner_list0_0.get(1), Some(Variant::from(2)));
1392
1393 let inner_list0_1 = outer_list0.get(1).unwrap();
1394 let inner_list0_1 = inner_list0_1.as_list().unwrap();
1395 assert_eq!(inner_list0_1.len(), 1);
1396 assert_eq!(inner_list0_1.get(0), Some(Variant::from(3)));
1397
1398 assert!(variant_array.is_null(1));
1400 }
1401
1402 #[test]
1403 fn test_map_row_builder() {
1404 use arrow::array::{Int32Array, MapArray, StringArray, StructArray};
1405 use arrow::buffer::{NullBuffer, OffsetBuffer};
1406 use arrow::datatypes::{DataType, Field, Fields};
1407 use std::sync::Arc;
1408
1409 let keys = StringArray::from(vec!["key1", "key2", "key3"]);
1411 let values = Int32Array::from(vec![1, 2, 3]);
1412 let entries_fields = Fields::from(vec![
1413 Field::new("key", DataType::Utf8, false),
1414 Field::new("value", DataType::Int32, true),
1415 ]);
1416 let entries = StructArray::new(
1417 entries_fields.clone(),
1418 vec![Arc::new(keys), Arc::new(values)],
1419 None, );
1421
1422 let offsets = OffsetBuffer::new(vec![0, 1, 1, 1, 3].into());
1428
1429 let null_buffer = Some(NullBuffer::from(vec![true, true, false, true]));
1431
1432 let map_field = Arc::new(Field::new(
1434 "entries",
1435 DataType::Struct(entries_fields),
1436 false, ));
1438
1439 let map_array = MapArray::try_new(
1441 map_field,
1442 offsets,
1443 entries,
1444 null_buffer,
1445 false, )
1447 .unwrap();
1448
1449 let variant_array = execute_row_builder_test(&map_array);
1450
1451 let map0 = variant_array.value(0);
1453 let obj0 = map0.as_object().unwrap();
1454 assert_eq!(obj0.len(), 1);
1455 assert_eq!(obj0.get("key1"), Some(Variant::from(1)));
1456
1457 let map1 = variant_array.value(1);
1459 let obj1 = map1.as_object().unwrap();
1460 assert_eq!(obj1.len(), 0); assert!(variant_array.is_null(2));
1464
1465 let map3 = variant_array.value(3);
1467 let obj3 = map3.as_object().unwrap();
1468 assert_eq!(obj3.len(), 2);
1469 assert_eq!(obj3.get("key2"), Some(Variant::from(2)));
1470 assert_eq!(obj3.get("key3"), Some(Variant::from(3)));
1471 }
1472
1473 #[test]
1474 fn test_union_sparse_row_builder() {
1475 use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1476 use arrow::buffer::ScalarBuffer;
1477 use arrow::datatypes::{DataType, Field, UnionFields};
1478 use std::sync::Arc;
1479
1480 let int_array = Int32Array::from(vec![Some(1), None, None, None, Some(34), None]);
1482 let float_array = Float64Array::from(vec![None, Some(3.2), None, Some(32.5), None, None]);
1483 let string_array = StringArray::from(vec![None, None, Some("hello"), None, None, None]);
1484 let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1485
1486 let union_fields = UnionFields::new(
1487 vec![0, 1, 2],
1488 vec![
1489 Field::new("int_field", DataType::Int32, false),
1490 Field::new("float_field", DataType::Float64, false),
1491 Field::new("string_field", DataType::Utf8, false),
1492 ],
1493 );
1494
1495 let children: Vec<Arc<dyn Array>> = vec![
1496 Arc::new(int_array),
1497 Arc::new(float_array),
1498 Arc::new(string_array),
1499 ];
1500
1501 let union_array = UnionArray::try_new(
1502 union_fields,
1503 type_ids,
1504 None, children,
1506 )
1507 .unwrap();
1508
1509 let variant_array = execute_row_builder_test(&union_array);
1510 assert_eq!(variant_array.value(0), Variant::Int32(1));
1511 assert_eq!(variant_array.value(1), Variant::Double(3.2));
1512 assert_eq!(variant_array.value(2), Variant::from("hello"));
1513 assert_eq!(variant_array.value(3), Variant::Double(32.5));
1514 assert_eq!(variant_array.value(4), Variant::Int32(34));
1515 assert!(variant_array.is_null(5));
1516 }
1517
1518 #[test]
1519 fn test_union_dense_row_builder() {
1520 use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1521 use arrow::buffer::ScalarBuffer;
1522 use arrow::datatypes::{DataType, Field, UnionFields};
1523 use std::sync::Arc;
1524
1525 let int_array = Int32Array::from(vec![Some(1), Some(34), None]);
1527 let float_array = Float64Array::from(vec![3.2, 32.5]);
1528 let string_array = StringArray::from(vec!["hello"]);
1529 let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1530 let offsets = [0, 0, 0, 1, 1, 2]
1531 .into_iter()
1532 .collect::<ScalarBuffer<i32>>();
1533
1534 let union_fields = UnionFields::new(
1535 vec![0, 1, 2],
1536 vec![
1537 Field::new("int_field", DataType::Int32, false),
1538 Field::new("float_field", DataType::Float64, false),
1539 Field::new("string_field", DataType::Utf8, false),
1540 ],
1541 );
1542
1543 let children: Vec<Arc<dyn Array>> = vec![
1544 Arc::new(int_array),
1545 Arc::new(float_array),
1546 Arc::new(string_array),
1547 ];
1548
1549 let union_array = UnionArray::try_new(
1550 union_fields,
1551 type_ids,
1552 Some(offsets), children,
1554 )
1555 .unwrap();
1556
1557 let options = CastOptions::default();
1559 let mut row_builder =
1560 make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1561 .unwrap();
1562
1563 let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1564 for i in 0..union_array.len() {
1565 row_builder.append_row(&mut variant_builder, i).unwrap();
1566 }
1567 let variant_array = variant_builder.build();
1568
1569 assert_eq!(variant_array.len(), 6);
1570 assert_eq!(variant_array.value(0), Variant::Int32(1));
1571 assert_eq!(variant_array.value(1), Variant::Double(3.2));
1572 assert_eq!(variant_array.value(2), Variant::from("hello"));
1573 assert_eq!(variant_array.value(3), Variant::Double(32.5));
1574 assert_eq!(variant_array.value(4), Variant::Int32(34));
1575 assert!(variant_array.is_null(5));
1576 }
1577
1578 #[test]
1579 fn test_union_sparse_type_ids_row_builder() {
1580 use arrow::array::{Int32Array, StringArray, UnionArray};
1581 use arrow::buffer::ScalarBuffer;
1582 use arrow::datatypes::{DataType, Field, UnionFields};
1583 use std::sync::Arc;
1584
1585 let int_array = Int32Array::from(vec![Some(42), None]);
1587 let string_array = StringArray::from(vec![None, Some("test")]);
1588 let type_ids = [1, 3].into_iter().collect::<ScalarBuffer<i8>>();
1589
1590 let union_fields = UnionFields::new(
1591 vec![1, 3], vec![
1593 Field::new("int_field", DataType::Int32, false),
1594 Field::new("string_field", DataType::Utf8, false),
1595 ],
1596 );
1597
1598 let children: Vec<Arc<dyn Array>> = vec![Arc::new(int_array), Arc::new(string_array)];
1599
1600 let union_array = UnionArray::try_new(
1601 union_fields,
1602 type_ids,
1603 None, children,
1605 )
1606 .unwrap();
1607
1608 let options = CastOptions::default();
1610 let mut row_builder =
1611 make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1612 .unwrap();
1613
1614 let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1615 for i in 0..union_array.len() {
1616 row_builder.append_row(&mut variant_builder, i).unwrap();
1617 }
1618 let variant_array = variant_builder.build();
1619
1620 assert_eq!(variant_array.len(), 2);
1622
1623 assert_eq!(variant_array.value(0), Variant::Int32(42));
1625
1626 assert_eq!(variant_array.value(1), Variant::from("test"));
1628 }
1629
1630 #[test]
1631 fn test_decimal32_row_builder() {
1632 use arrow::array::Decimal32Array;
1633 use parquet_variant::VariantDecimal4;
1634
1635 let decimal_array = Decimal32Array::from(vec![Some(1234), None, Some(-5678)])
1637 .with_precision_and_scale(9, 2)
1638 .unwrap();
1639
1640 test_row_builder_basic(
1641 &decimal_array,
1642 vec![
1643 Some(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())),
1644 None,
1645 Some(Variant::from(VariantDecimal4::try_new(-5678, 2).unwrap())),
1646 ],
1647 );
1648 }
1649
1650 #[test]
1651 fn test_decimal128_row_builder() {
1652 use arrow::array::Decimal128Array;
1653 use parquet_variant::VariantDecimal16;
1654
1655 let decimal_array = Decimal128Array::from(vec![Some(123), None, Some(456)])
1657 .with_precision_and_scale(10, -2)
1658 .unwrap();
1659
1660 test_row_builder_basic(
1661 &decimal_array,
1662 vec![
1663 Some(Variant::from(VariantDecimal16::try_new(12300, 0).unwrap())),
1664 None,
1665 Some(Variant::from(VariantDecimal16::try_new(45600, 0).unwrap())),
1666 ],
1667 );
1668 }
1669
1670 #[test]
1671 fn test_decimal256_overflow_row_builder() {
1672 use arrow::array::Decimal256Array;
1673 use arrow::datatypes::i256;
1674
1675 let large_value = i256::from_i128(i128::MAX) + i256::from(1); let decimal_array = Decimal256Array::from(vec![Some(large_value), Some(i256::from(123))])
1678 .with_precision_and_scale(76, 3)
1679 .unwrap();
1680
1681 test_row_builder_basic_with_options(
1682 &decimal_array,
1683 vec![
1684 Some(Variant::Null), Some(Variant::from(VariantDecimal16::try_new(123, 3).unwrap())),
1686 ],
1687 CastOptions { strict: false },
1688 );
1689 }
1690
1691 #[test]
1692 fn test_binary_row_builder() {
1693 use arrow::array::BinaryArray;
1694
1695 let binary_data = vec![
1696 Some(b"hello".as_slice()),
1697 None,
1698 Some(b"\x00\x01\x02\xFF".as_slice()),
1699 Some(b"".as_slice()), ];
1701 let binary_array = BinaryArray::from(binary_data);
1702
1703 test_row_builder_basic(
1704 &binary_array,
1705 vec![
1706 Some(Variant::from(b"hello".as_slice())),
1707 None,
1708 Some(Variant::from([0x00, 0x01, 0x02, 0xFF].as_slice())),
1709 Some(Variant::from([].as_slice())),
1710 ],
1711 );
1712 }
1713
1714 #[test]
1715 fn test_binary_view_row_builder() {
1716 use arrow::array::BinaryViewArray;
1717
1718 let binary_data = vec![
1719 Some(b"short".as_slice()),
1720 None,
1721 Some(b"this is a longer binary view that exceeds inline storage".as_slice()),
1722 ];
1723 let binary_view_array = BinaryViewArray::from(binary_data);
1724
1725 test_row_builder_basic(
1726 &binary_view_array,
1727 vec![
1728 Some(Variant::from(b"short".as_slice())),
1729 None,
1730 Some(Variant::from(
1731 b"this is a longer binary view that exceeds inline storage".as_slice(),
1732 )),
1733 ],
1734 );
1735 }
1736
1737 #[test]
1738 fn test_fixed_size_binary_row_builder() {
1739 use arrow::array::FixedSizeBinaryArray;
1740
1741 let binary_data = vec![
1742 Some([0x01, 0x02, 0x03, 0x04]),
1743 None,
1744 Some([0xFF, 0xFE, 0xFD, 0xFC]),
1745 ];
1746 let fixed_binary_array =
1747 FixedSizeBinaryArray::try_from_sparse_iter_with_size(binary_data.into_iter(), 4)
1748 .unwrap();
1749
1750 test_row_builder_basic(
1751 &fixed_binary_array,
1752 vec![
1753 Some(Variant::from([0x01, 0x02, 0x03, 0x04].as_slice())),
1754 None,
1755 Some(Variant::from([0xFF, 0xFE, 0xFD, 0xFC].as_slice())),
1756 ],
1757 );
1758 }
1759
1760 #[test]
1761 fn test_utf8_view_row_builder() {
1762 use arrow::array::StringViewArray;
1763
1764 let string_data = vec![
1765 Some("short"),
1766 None,
1767 Some("this is a much longer string that will be stored out-of-line in the buffer"),
1768 ];
1769 let string_view_array = StringViewArray::from(string_data);
1770
1771 test_row_builder_basic(
1772 &string_view_array,
1773 vec![
1774 Some(Variant::from("short")),
1775 None,
1776 Some(Variant::from(
1777 "this is a much longer string that will be stored out-of-line in the buffer",
1778 )),
1779 ],
1780 );
1781 }
1782
1783 #[test]
1784 fn test_timestamp_second_row_builder() {
1785 use arrow::array::TimestampSecondArray;
1786
1787 let timestamp_data = vec![
1788 Some(1609459200), None,
1790 Some(1640995200), ];
1792 let timestamp_array = TimestampSecondArray::from(timestamp_data);
1793
1794 let expected_naive1 = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1795 let expected_naive2 = DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc();
1796
1797 test_row_builder_basic(
1798 ×tamp_array,
1799 vec![
1800 Some(Variant::from(expected_naive1)),
1801 None,
1802 Some(Variant::from(expected_naive2)),
1803 ],
1804 );
1805 }
1806
1807 #[test]
1808 fn test_timestamp_with_timezone_row_builder() {
1809 use arrow::array::TimestampMicrosecondArray;
1810 use chrono::DateTime;
1811
1812 let timestamp_data = vec![
1813 Some(1609459200000000), None,
1815 Some(1640995200000000), ];
1817 let timezone = "UTC".to_string();
1818 let timestamp_array =
1819 TimestampMicrosecondArray::from(timestamp_data).with_timezone(timezone);
1820
1821 let expected_utc1 = DateTime::from_timestamp(1609459200, 0).unwrap();
1822 let expected_utc2 = DateTime::from_timestamp(1640995200, 0).unwrap();
1823
1824 test_row_builder_basic(
1825 ×tamp_array,
1826 vec![
1827 Some(Variant::from(expected_utc1)),
1828 None,
1829 Some(Variant::from(expected_utc2)),
1830 ],
1831 );
1832 }
1833
1834 #[test]
1835 fn test_timestamp_nanosecond_precision_row_builder() {
1836 use arrow::array::TimestampNanosecondArray;
1837
1838 let timestamp_data = vec![
1839 Some(1609459200123456789), None,
1841 Some(1609459200000000000), ];
1843 let timestamp_array = TimestampNanosecondArray::from(timestamp_data);
1844
1845 let expected_with_nanos = DateTime::from_timestamp(1609459200, 123456789)
1846 .unwrap()
1847 .naive_utc();
1848 let expected_no_nanos = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1849
1850 test_row_builder_basic(
1851 ×tamp_array,
1852 vec![
1853 Some(Variant::from(expected_with_nanos)),
1854 None,
1855 Some(Variant::from(expected_no_nanos)),
1856 ],
1857 );
1858 }
1859
1860 #[test]
1861 fn test_timestamp_millisecond_row_builder() {
1862 use arrow::array::TimestampMillisecondArray;
1863
1864 let timestamp_data = vec![
1865 Some(1609459200123), None,
1867 Some(1609459200000), ];
1869 let timestamp_array = TimestampMillisecondArray::from(timestamp_data);
1870
1871 let expected_with_millis = DateTime::from_timestamp(1609459200, 123000000)
1872 .unwrap()
1873 .naive_utc();
1874 let expected_no_millis = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1875
1876 test_row_builder_basic(
1877 ×tamp_array,
1878 vec![
1879 Some(Variant::from(expected_with_millis)),
1880 None,
1881 Some(Variant::from(expected_no_millis)),
1882 ],
1883 );
1884 }
1885
1886 #[test]
1887 fn test_date32_row_builder() {
1888 use arrow::array::Date32Array;
1889 use chrono::NaiveDate;
1890
1891 let date_data = vec![
1892 Some(0), None,
1894 Some(19723), Some(-719162), ];
1897 let date_array = Date32Array::from(date_data);
1898
1899 let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1900 let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1901 let expected_min = NaiveDate::from_ymd_opt(1, 1, 1).unwrap();
1902
1903 test_row_builder_basic(
1904 &date_array,
1905 vec![
1906 Some(Variant::from(expected_epoch)),
1907 None,
1908 Some(Variant::from(expected_2024)),
1909 Some(Variant::from(expected_min)),
1910 ],
1911 );
1912 }
1913
1914 #[test]
1915 fn test_date64_row_builder() {
1916 use arrow::array::Date64Array;
1917 use chrono::NaiveDate;
1918
1919 let date_data = vec![
1921 Some(0), None,
1923 Some(1704067200000), Some(86400000), ];
1926 let date_array = Date64Array::from(date_data);
1927
1928 let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1929 let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1930 let expected_next_day = NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();
1931
1932 test_row_builder_basic(
1933 &date_array,
1934 vec![
1935 Some(Variant::from(expected_epoch)),
1936 None,
1937 Some(Variant::from(expected_2024)),
1938 Some(Variant::from(expected_next_day)),
1939 ],
1940 );
1941 }
1942
1943 #[test]
1944 fn test_time32_second_row_builder() {
1945 use arrow::array::Time32SecondArray;
1946 use chrono::NaiveTime;
1947
1948 let time_data = vec![
1950 Some(0), None,
1952 Some(3661), Some(86399), ];
1955 let time_array = Time32SecondArray::from(time_data);
1956
1957 let expected_midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
1958 let expected_time = NaiveTime::from_hms_opt(1, 1, 1).unwrap();
1959 let expected_last = NaiveTime::from_hms_opt(23, 59, 59).unwrap();
1960
1961 test_row_builder_basic(
1962 &time_array,
1963 vec![
1964 Some(Variant::from(expected_midnight)),
1965 None,
1966 Some(Variant::from(expected_time)),
1967 Some(Variant::from(expected_last)),
1968 ],
1969 );
1970 }
1971
1972 #[test]
1973 fn test_time32_millisecond_row_builder() {
1974 use arrow::array::Time32MillisecondArray;
1975 use chrono::NaiveTime;
1976
1977 let time_data = vec![
1979 Some(0), None,
1981 Some(3661123), Some(86399999), ];
1984 let time_array = Time32MillisecondArray::from(time_data);
1985
1986 let expected_midnight = NaiveTime::from_hms_milli_opt(0, 0, 0, 0).unwrap();
1987 let expected_time = NaiveTime::from_hms_milli_opt(1, 1, 1, 123).unwrap();
1988 let expected_last = NaiveTime::from_hms_milli_opt(23, 59, 59, 999).unwrap();
1989
1990 test_row_builder_basic(
1991 &time_array,
1992 vec![
1993 Some(Variant::from(expected_midnight)),
1994 None,
1995 Some(Variant::from(expected_time)),
1996 Some(Variant::from(expected_last)),
1997 ],
1998 );
1999 }
2000
2001 #[test]
2002 fn test_time64_microsecond_row_builder() {
2003 use arrow::array::Time64MicrosecondArray;
2004 use chrono::NaiveTime;
2005
2006 let time_data = vec![
2008 Some(0), None,
2010 Some(3661123456), Some(86399999999), ];
2013 let time_array = Time64MicrosecondArray::from(time_data);
2014
2015 let expected_midnight = NaiveTime::from_hms_micro_opt(0, 0, 0, 0).unwrap();
2016 let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2017 let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2018
2019 test_row_builder_basic(
2020 &time_array,
2021 vec![
2022 Some(Variant::from(expected_midnight)),
2023 None,
2024 Some(Variant::from(expected_time)),
2025 Some(Variant::from(expected_last)),
2026 ],
2027 );
2028 }
2029
2030 #[test]
2031 fn test_time64_nanosecond_row_builder() {
2032 use arrow::array::Time64NanosecondArray;
2033 use chrono::NaiveTime;
2034
2035 let time_data = vec![
2037 Some(0), None,
2039 Some(3661123456789), Some(86399999999999), ];
2042 let time_array = Time64NanosecondArray::from(time_data);
2043
2044 let expected_midnight = NaiveTime::from_hms_nano_opt(0, 0, 0, 0).unwrap();
2045 let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2047 let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2048
2049 test_row_builder_basic(
2050 &time_array,
2051 vec![
2052 Some(Variant::from(expected_midnight)),
2053 None,
2054 Some(Variant::from(expected_time)),
2055 Some(Variant::from(expected_last)),
2056 ],
2057 );
2058 }
2059}