1use arrow::array::{
19 Array, ArrayRef, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray,
20 GenericListViewArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
21};
22use arrow::compute::{CastOptions, kernels::cast};
23use arrow::datatypes::{
24 self as datatypes, ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType,
25 DecimalType, RunEndIndexType,
26};
27use arrow::temporal_conversions::{as_date, as_datetime, as_time};
28use arrow_schema::{ArrowError, DataType, TimeUnit};
29use chrono::{DateTime, TimeZone, Utc};
30use parquet_variant::{
31 ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
32 VariantDecimal16, VariantDecimalType,
33};
34use std::collections::HashMap;
35use std::ops::Range;
36
37pub(crate) enum ArrowToVariantRowBuilder<'a> {
43 Null(NullArrowToVariantBuilder),
44 Boolean(BooleanArrowToVariantBuilder<'a>),
45 PrimitiveInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::Int8Type>),
46 PrimitiveInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::Int16Type>),
47 PrimitiveInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::Int32Type>),
48 PrimitiveInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::Int64Type>),
49 PrimitiveUInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt8Type>),
50 PrimitiveUInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt16Type>),
51 PrimitiveUInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt32Type>),
52 PrimitiveUInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt64Type>),
53 PrimitiveFloat16(PrimitiveArrowToVariantBuilder<'a, datatypes::Float16Type>),
54 PrimitiveFloat32(PrimitiveArrowToVariantBuilder<'a, datatypes::Float32Type>),
55 PrimitiveFloat64(PrimitiveArrowToVariantBuilder<'a, datatypes::Float64Type>),
56 Decimal32(DecimalArrowToVariantBuilder<'a, datatypes::Decimal32Type, VariantDecimal4>),
57 Decimal64(DecimalArrowToVariantBuilder<'a, datatypes::Decimal64Type, VariantDecimal8>),
58 Decimal128(DecimalArrowToVariantBuilder<'a, datatypes::Decimal128Type, VariantDecimal16>),
59 Decimal256(Decimal256ArrowToVariantBuilder<'a>),
60 TimestampSecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampSecondType>),
61 TimestampMillisecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMillisecondType>),
62 TimestampMicrosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMicrosecondType>),
63 TimestampNanosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampNanosecondType>),
64 Date32(DateArrowToVariantBuilder<'a, datatypes::Date32Type>),
65 Date64(DateArrowToVariantBuilder<'a, datatypes::Date64Type>),
66 Time32Second(TimeArrowToVariantBuilder<'a, datatypes::Time32SecondType>),
67 Time32Millisecond(TimeArrowToVariantBuilder<'a, datatypes::Time32MillisecondType>),
68 Time64Microsecond(TimeArrowToVariantBuilder<'a, datatypes::Time64MicrosecondType>),
69 Time64Nanosecond(TimeArrowToVariantBuilder<'a, datatypes::Time64NanosecondType>),
70 Binary(BinaryArrowToVariantBuilder<'a, i32>),
71 LargeBinary(BinaryArrowToVariantBuilder<'a, i64>),
72 BinaryView(BinaryViewArrowToVariantBuilder<'a>),
73 FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder<'a>),
74 Utf8(StringArrowToVariantBuilder<'a, i32>),
75 LargeUtf8(StringArrowToVariantBuilder<'a, i64>),
76 Utf8View(StringViewArrowToVariantBuilder<'a>),
77 List(ListArrowToVariantBuilder<'a, GenericListArray<i32>>),
78 LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
79 ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
80 LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
81 FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
82 Struct(StructArrowToVariantBuilder<'a>),
83 Map(MapArrowToVariantBuilder<'a>),
84 Union(UnionArrowToVariantBuilder<'a>),
85 Dictionary(DictionaryArrowToVariantBuilder<'a>),
86 RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int16Type>),
87 RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int32Type>),
88 RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int64Type>),
89}
90
91impl<'a> ArrowToVariantRowBuilder<'a> {
92 pub fn append_row(
94 &mut self,
95 builder: &mut impl VariantBuilderExt,
96 index: usize,
97 ) -> Result<(), ArrowError> {
98 use ArrowToVariantRowBuilder::*;
99 match self {
100 Null(b) => b.append_row(builder, index),
101 Boolean(b) => b.append_row(builder, index),
102 PrimitiveInt8(b) => b.append_row(builder, index),
103 PrimitiveInt16(b) => b.append_row(builder, index),
104 PrimitiveInt32(b) => b.append_row(builder, index),
105 PrimitiveInt64(b) => b.append_row(builder, index),
106 PrimitiveUInt8(b) => b.append_row(builder, index),
107 PrimitiveUInt16(b) => b.append_row(builder, index),
108 PrimitiveUInt32(b) => b.append_row(builder, index),
109 PrimitiveUInt64(b) => b.append_row(builder, index),
110 PrimitiveFloat16(b) => b.append_row(builder, index),
111 PrimitiveFloat32(b) => b.append_row(builder, index),
112 PrimitiveFloat64(b) => b.append_row(builder, index),
113 Decimal32(b) => b.append_row(builder, index),
114 Decimal64(b) => b.append_row(builder, index),
115 Decimal128(b) => b.append_row(builder, index),
116 Decimal256(b) => b.append_row(builder, index),
117 TimestampSecond(b) => b.append_row(builder, index),
118 TimestampMillisecond(b) => b.append_row(builder, index),
119 TimestampMicrosecond(b) => b.append_row(builder, index),
120 TimestampNanosecond(b) => b.append_row(builder, index),
121 Date32(b) => b.append_row(builder, index),
122 Date64(b) => b.append_row(builder, index),
123 Time32Second(b) => b.append_row(builder, index),
124 Time32Millisecond(b) => b.append_row(builder, index),
125 Time64Microsecond(b) => b.append_row(builder, index),
126 Time64Nanosecond(b) => b.append_row(builder, index),
127 Binary(b) => b.append_row(builder, index),
128 LargeBinary(b) => b.append_row(builder, index),
129 BinaryView(b) => b.append_row(builder, index),
130 FixedSizeBinary(b) => b.append_row(builder, index),
131 Utf8(b) => b.append_row(builder, index),
132 LargeUtf8(b) => b.append_row(builder, index),
133 Utf8View(b) => b.append_row(builder, index),
134 List(b) => b.append_row(builder, index),
135 LargeList(b) => b.append_row(builder, index),
136 ListView(b) => b.append_row(builder, index),
137 LargeListView(b) => b.append_row(builder, index),
138 FixedSizeList(b) => b.append_row(builder, index),
139 Struct(b) => b.append_row(builder, index),
140 Map(b) => b.append_row(builder, index),
141 Union(b) => b.append_row(builder, index),
142 Dictionary(b) => b.append_row(builder, index),
143 RunEndEncodedInt16(b) => b.append_row(builder, index),
144 RunEndEncodedInt32(b) => b.append_row(builder, index),
145 RunEndEncodedInt64(b) => b.append_row(builder, index),
146 }
147 }
148}
149
150pub(crate) fn make_arrow_to_variant_row_builder<'a>(
152 data_type: &'a DataType,
153 array: &'a dyn Array,
154 options: &'a CastOptions,
155) -> Result<ArrowToVariantRowBuilder<'a>, ArrowError> {
156 use ArrowToVariantRowBuilder::*;
157 let builder =
158 match data_type {
159 DataType::Null => Null(NullArrowToVariantBuilder),
160 DataType::Boolean => Boolean(BooleanArrowToVariantBuilder::new(array)),
161 DataType::Int8 => PrimitiveInt8(PrimitiveArrowToVariantBuilder::new(array)),
162 DataType::Int16 => PrimitiveInt16(PrimitiveArrowToVariantBuilder::new(array)),
163 DataType::Int32 => PrimitiveInt32(PrimitiveArrowToVariantBuilder::new(array)),
164 DataType::Int64 => PrimitiveInt64(PrimitiveArrowToVariantBuilder::new(array)),
165 DataType::UInt8 => PrimitiveUInt8(PrimitiveArrowToVariantBuilder::new(array)),
166 DataType::UInt16 => PrimitiveUInt16(PrimitiveArrowToVariantBuilder::new(array)),
167 DataType::UInt32 => PrimitiveUInt32(PrimitiveArrowToVariantBuilder::new(array)),
168 DataType::UInt64 => PrimitiveUInt64(PrimitiveArrowToVariantBuilder::new(array)),
169 DataType::Float16 => PrimitiveFloat16(PrimitiveArrowToVariantBuilder::new(array)),
170 DataType::Float32 => PrimitiveFloat32(PrimitiveArrowToVariantBuilder::new(array)),
171 DataType::Float64 => PrimitiveFloat64(PrimitiveArrowToVariantBuilder::new(array)),
172 DataType::Decimal32(_, scale) => {
173 Decimal32(DecimalArrowToVariantBuilder::new(array, options, *scale))
174 }
175 DataType::Decimal64(_, scale) => {
176 Decimal64(DecimalArrowToVariantBuilder::new(array, options, *scale))
177 }
178 DataType::Decimal128(_, scale) => {
179 Decimal128(DecimalArrowToVariantBuilder::new(array, options, *scale))
180 }
181 DataType::Decimal256(_, scale) => {
182 Decimal256(Decimal256ArrowToVariantBuilder::new(array, options, *scale))
183 }
184 DataType::Timestamp(time_unit, time_zone) => {
185 match time_unit {
186 TimeUnit::Second => TimestampSecond(TimestampArrowToVariantBuilder::new(
187 array,
188 options,
189 time_zone.is_some(),
190 )),
191 TimeUnit::Millisecond => TimestampMillisecond(
192 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
193 ),
194 TimeUnit::Microsecond => TimestampMicrosecond(
195 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
196 ),
197 TimeUnit::Nanosecond => TimestampNanosecond(
198 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
199 ),
200 }
201 }
202 DataType::Date32 => Date32(DateArrowToVariantBuilder::new(array, options)),
203 DataType::Date64 => Date64(DateArrowToVariantBuilder::new(array, options)),
204 DataType::Time32(time_unit) => match time_unit {
205 TimeUnit::Second => Time32Second(TimeArrowToVariantBuilder::new(array, options)),
206 TimeUnit::Millisecond => {
207 Time32Millisecond(TimeArrowToVariantBuilder::new(array, options))
208 }
209 _ => {
210 return Err(ArrowError::CastError(format!(
211 "Unsupported Time32 unit: {time_unit:?}"
212 )));
213 }
214 },
215 DataType::Time64(time_unit) => match time_unit {
216 TimeUnit::Microsecond => {
217 Time64Microsecond(TimeArrowToVariantBuilder::new(array, options))
218 }
219 TimeUnit::Nanosecond => {
220 Time64Nanosecond(TimeArrowToVariantBuilder::new(array, options))
221 }
222 _ => {
223 return Err(ArrowError::CastError(format!(
224 "Unsupported Time64 unit: {time_unit:?}"
225 )));
226 }
227 },
228 DataType::Duration(_) | DataType::Interval(_) => {
229 return Err(ArrowError::InvalidArgumentError(
230 "Casting duration/interval types to Variant is not supported. \
231 The Variant format does not define duration/interval types."
232 .to_string(),
233 ));
234 }
235 DataType::Binary => Binary(BinaryArrowToVariantBuilder::new(array)),
236 DataType::LargeBinary => LargeBinary(BinaryArrowToVariantBuilder::new(array)),
237 DataType::BinaryView => BinaryView(BinaryViewArrowToVariantBuilder::new(array)),
238 DataType::FixedSizeBinary(_) => {
239 FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder::new(array))
240 }
241 DataType::Utf8 => Utf8(StringArrowToVariantBuilder::new(array)),
242 DataType::LargeUtf8 => LargeUtf8(StringArrowToVariantBuilder::new(array)),
243 DataType::Utf8View => Utf8View(StringViewArrowToVariantBuilder::new(array)),
244 DataType::List(_) => List(ListArrowToVariantBuilder::new(array.as_list(), options)?),
245 DataType::LargeList(_) => {
246 LargeList(ListArrowToVariantBuilder::new(array.as_list(), options)?)
247 }
248 DataType::ListView(_) => ListView(ListArrowToVariantBuilder::new(
249 array.as_list_view(),
250 options,
251 )?),
252 DataType::LargeListView(_) => LargeListView(ListArrowToVariantBuilder::new(
253 array.as_list_view(),
254 options,
255 )?),
256 DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
257 array.as_fixed_size_list(),
258 options,
259 )?),
260 DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
261 array.as_struct(),
262 options,
263 )?),
264 DataType::Map(_, _) => Map(MapArrowToVariantBuilder::new(array, options)?),
265 DataType::Union(_, _) => Union(UnionArrowToVariantBuilder::new(array, options)?),
266 DataType::Dictionary(_, _) => {
267 Dictionary(DictionaryArrowToVariantBuilder::new(array, options)?)
268 }
269 DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
270 DataType::Int16 => {
271 RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
272 }
273 DataType::Int32 => {
274 RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
275 }
276 DataType::Int64 => {
277 RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
278 }
279 _ => {
280 return Err(ArrowError::CastError(format!(
281 "Unsupported run ends type: {}",
282 run_ends.data_type()
283 )));
284 }
285 },
286 };
287 Ok(builder)
288}
289
290macro_rules! define_row_builder {
318 (
319 struct $name:ident<$lifetime:lifetime $(, $generic:ident $( : $bound:path )? )*>
320 $( where $where_path:path: $where_bound:path $(,)? )?
321 $({ $( $field:ident: $field_type:ty ),+ $(,)? })?,
322 |$array_param:ident| -> $array_type:ty { $init_expr:expr }
323 $(, |$value:ident| $(-> Option<$option_ty:ty>)? $value_transform:expr )?
324 ) => {
325 pub(crate) struct $name<$lifetime $(, $generic: $( $bound )? )*>
326 $( where $where_path: $where_bound )?
327 {
328 array: &$lifetime $array_type,
329 $( $( $field: $field_type, )+ )?
330 _phantom: std::marker::PhantomData<($( $generic, )*)>, }
332
333 impl<$lifetime $(, $generic: $( $bound )? )*> $name<$lifetime $(, $generic)*>
334 $( where $where_path: $where_bound )?
335 {
336 pub(crate) fn new($array_param: &$lifetime dyn Array $( $(, $field: $field_type )+ )?) -> Self {
337 Self {
338 array: $init_expr,
339 $( $( $field, )+ )?
340 _phantom: std::marker::PhantomData,
341 }
342 }
343
344 fn append_row(&self, builder: &mut impl VariantBuilderExt, index: usize) -> Result<(), ArrowError> {
345 if self.array.is_null(index) {
346 builder.append_null();
347 } else {
348 $(
357 #[allow(unused)]
358 $( let $field = &self.$field; )+
359 )?
360
361 let value = self.array.value(index);
363 $(
364 let $value = value;
365 let value = $value_transform;
366 $(
367 let Some(value): Option<$option_ty> = value else {
369 if !self.options.safe {
370 return Err(ArrowError::ComputeError(format!(
371 "Failed to convert value at index {index}: conversion failed",
372 )));
373 } else {
374 builder.append_value(Variant::Null);
377 return Ok(());
378 }
379 };
380 )?
381 )?
382 builder.append_value(value);
383 }
384 Ok(())
385 }
386 }
387 };
388}
389
390define_row_builder!(
391 struct BooleanArrowToVariantBuilder<'a>,
392 |array| -> arrow::array::BooleanArray { array.as_boolean() }
393);
394
395define_row_builder!(
396 struct PrimitiveArrowToVariantBuilder<'a, T: ArrowPrimitiveType>
397 where T::Native: Into<Variant<'a, 'a>>,
398 |array| -> PrimitiveArray<T> { array.as_primitive() }
399);
400
401define_row_builder!(
402 struct DecimalArrowToVariantBuilder<'a, A: DecimalType, V>
403 where
404 V: VariantDecimalType<Native = A::Native>,
405 {
406 options: &'a CastOptions<'a>,
407 scale: i8,
408 },
409 |array| -> PrimitiveArray<A> { array.as_primitive() },
410 |value| -> Option<_> { V::try_new_with_signed_scale(value, *scale).ok() }
411);
412
413define_row_builder!(
415 struct Decimal256ArrowToVariantBuilder<'a> {
416 options: &'a CastOptions<'a>,
417 scale: i8,
418 },
419 |array| -> arrow::array::Decimal256Array { array.as_primitive() },
420 |value| -> Option<_> {
421 let value = value.to_i128();
422 value.and_then(|v| VariantDecimal16::try_new_with_signed_scale(v, *scale).ok())
423 }
424);
425
426define_row_builder!(
427 struct TimestampArrowToVariantBuilder<'a, T: ArrowTimestampType> {
428 options: &'a CastOptions<'a>,
429 has_time_zone: bool,
430 },
431 |array| -> PrimitiveArray<T> { array.as_primitive() },
432 |value| -> Option<_> {
433 as_datetime::<T>(value).map(|naive_datetime| {
435 if *has_time_zone {
436 let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&naive_datetime);
438 Variant::from(utc_dt) } else {
440 Variant::from(naive_datetime) }
443 })
444 }
445);
446
447define_row_builder!(
448 struct DateArrowToVariantBuilder<'a, T: ArrowTemporalType>
449 where
450 i64: From<T::Native>,
451 {
452 options: &'a CastOptions<'a>,
453 },
454 |array| -> PrimitiveArray<T> { array.as_primitive() },
455 |value| -> Option<_> {
456 let date_value = i64::from(value);
457 as_date::<T>(date_value)
458 }
459);
460
461define_row_builder!(
462 struct TimeArrowToVariantBuilder<'a, T: ArrowTemporalType>
463 where
464 i64: From<T::Native>,
465 {
466 options: &'a CastOptions<'a>,
467 },
468 |array| -> PrimitiveArray<T> { array.as_primitive() },
469 |value| -> Option<_> {
470 let time_value = i64::from(value);
471 as_time::<T>(time_value)
472 }
473);
474
475define_row_builder!(
476 struct BinaryArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
477 |array| -> GenericBinaryArray<O> { array.as_binary() }
478);
479
480define_row_builder!(
481 struct BinaryViewArrowToVariantBuilder<'a>,
482 |array| -> arrow::array::BinaryViewArray { array.as_byte_view() }
483);
484
485define_row_builder!(
486 struct FixedSizeBinaryArrowToVariantBuilder<'a>,
487 |array| -> arrow::array::FixedSizeBinaryArray { array.as_fixed_size_binary() }
488);
489
490define_row_builder!(
491 struct StringArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
492 |array| -> GenericStringArray<O> { array.as_string() }
493);
494
495define_row_builder!(
496 struct StringViewArrowToVariantBuilder<'a>,
497 |array| -> arrow::array::StringViewArray { array.as_string_view() }
498);
499
500pub(crate) struct NullArrowToVariantBuilder;
502
503impl NullArrowToVariantBuilder {
504 fn append_row(
505 &mut self,
506 builder: &mut impl VariantBuilderExt,
507 _index: usize,
508 ) -> Result<(), ArrowError> {
509 builder.append_null();
510 Ok(())
511 }
512}
513
514pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
517 list_array: &'a L,
518 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
519}
520
521impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
522 pub(crate) fn new(array: &'a L, options: &'a CastOptions) -> Result<Self, ArrowError> {
523 let values = array.values();
524 let values_builder =
525 make_arrow_to_variant_row_builder(values.data_type(), values, options)?;
526
527 Ok(Self {
528 list_array: array,
529 values_builder: Box::new(values_builder),
530 })
531 }
532
533 fn append_row(
534 &mut self,
535 builder: &mut impl VariantBuilderExt,
536 index: usize,
537 ) -> Result<(), ArrowError> {
538 if self.list_array.is_null(index) {
539 builder.append_null();
540 return Ok(());
541 }
542
543 let range = self.list_array.element_range(index);
544
545 let mut list_builder = builder.try_new_list()?;
546 for value_index in range {
547 self.values_builder
548 .append_row(&mut list_builder, value_index)?;
549 }
550 list_builder.finish();
551 Ok(())
552 }
553}
554
555pub(crate) trait ListLikeArray: Array {
557 fn values(&self) -> &ArrayRef;
559
560 fn element_range(&self, index: usize) -> Range<usize>;
562}
563
564impl<O: OffsetSizeTrait> ListLikeArray for GenericListArray<O> {
565 fn values(&self) -> &ArrayRef {
566 self.values()
567 }
568
569 fn element_range(&self, index: usize) -> Range<usize> {
570 let offsets = self.offsets();
571 let start = offsets[index].as_usize();
572 let end = offsets[index + 1].as_usize();
573 start..end
574 }
575}
576
577impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
578 fn values(&self) -> &ArrayRef {
579 self.values()
580 }
581
582 fn element_range(&self, index: usize) -> Range<usize> {
583 let offsets = self.value_offsets();
584 let sizes = self.value_sizes();
585 let offset = offsets[index].as_usize();
586 let size = sizes[index].as_usize();
587 offset..(offset + size)
588 }
589}
590
591impl ListLikeArray for FixedSizeListArray {
592 fn values(&self) -> &ArrayRef {
593 self.values()
594 }
595
596 fn element_range(&self, index: usize) -> Range<usize> {
597 let value_length = self.value_length().as_usize();
598 let offset = index * value_length;
599 offset..(offset + value_length)
600 }
601}
602
603pub(crate) struct StructArrowToVariantBuilder<'a> {
605 struct_array: &'a arrow::array::StructArray,
606 field_builders: Vec<(&'a str, ArrowToVariantRowBuilder<'a>)>,
607}
608
609impl<'a> StructArrowToVariantBuilder<'a> {
610 pub(crate) fn new(
611 struct_array: &'a arrow::array::StructArray,
612 options: &'a CastOptions,
613 ) -> Result<Self, ArrowError> {
614 let mut field_builders = Vec::new();
615
616 for (field_name, field_array) in struct_array
618 .column_names()
619 .iter()
620 .zip(struct_array.columns().iter())
621 {
622 let field_builder = make_arrow_to_variant_row_builder(
623 field_array.data_type(),
624 field_array.as_ref(),
625 options,
626 )?;
627 field_builders.push((*field_name, field_builder));
628 }
629
630 Ok(Self {
631 struct_array,
632 field_builders,
633 })
634 }
635
636 fn append_row(
637 &mut self,
638 builder: &mut impl VariantBuilderExt,
639 index: usize,
640 ) -> Result<(), ArrowError> {
641 if self.struct_array.is_null(index) {
642 builder.append_null();
643 } else {
644 let mut obj_builder = builder.try_new_object()?;
646
647 for (field_name, row_builder) in &mut self.field_builders {
649 let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
650 row_builder.append_row(&mut field_builder, index)?;
651 }
652
653 obj_builder.finish();
654 }
655 Ok(())
656 }
657}
658
659pub(crate) struct MapArrowToVariantBuilder<'a> {
661 map_array: &'a arrow::array::MapArray,
662 key_strings: arrow::array::StringArray,
663 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
664}
665
666impl<'a> MapArrowToVariantBuilder<'a> {
667 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
668 let map_array = array.as_map();
669
670 let keys = cast(map_array.keys(), &DataType::Utf8)?;
672 let key_strings = keys.as_string::<i32>().clone();
673
674 let values = map_array.values();
676 let values_builder =
677 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
678
679 Ok(Self {
680 map_array,
681 key_strings,
682 values_builder: Box::new(values_builder),
683 })
684 }
685
686 fn append_row(
687 &mut self,
688 builder: &mut impl VariantBuilderExt,
689 index: usize,
690 ) -> Result<(), ArrowError> {
691 if self.map_array.is_null(index) {
693 builder.append_null();
694 return Ok(());
695 }
696
697 let offsets = self.map_array.offsets();
698 let start = offsets[index].as_usize();
699 let end = offsets[index + 1].as_usize();
700
701 let mut object_builder = builder.try_new_object()?;
703
704 for kv_index in start..end {
706 let key = self.key_strings.value(kv_index);
707 let mut field_builder = ObjectFieldBuilder::new(key, &mut object_builder);
708 self.values_builder
709 .append_row(&mut field_builder, kv_index)?;
710 }
711
712 object_builder.finish();
713 Ok(())
714 }
715}
716
717pub(crate) struct UnionArrowToVariantBuilder<'a> {
721 union_array: &'a arrow::array::UnionArray,
722 child_builders: HashMap<i8, Box<ArrowToVariantRowBuilder<'a>>>,
723}
724
725impl<'a> UnionArrowToVariantBuilder<'a> {
726 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
727 let union_array = array.as_union();
728 let type_ids = union_array.type_ids();
729
730 let mut child_builders = HashMap::new();
732 for &type_id in type_ids {
733 let child_array = union_array.child(type_id);
734 let child_builder = make_arrow_to_variant_row_builder(
735 child_array.data_type(),
736 child_array.as_ref(),
737 options,
738 )?;
739 child_builders.insert(type_id, Box::new(child_builder));
740 }
741
742 Ok(Self {
743 union_array,
744 child_builders,
745 })
746 }
747
748 fn append_row(
749 &mut self,
750 builder: &mut impl VariantBuilderExt,
751 index: usize,
752 ) -> Result<(), ArrowError> {
753 let type_id = self.union_array.type_id(index);
754 let value_offset = self.union_array.value_offset(index);
755
756 match self.child_builders.get_mut(&type_id) {
758 Some(child_builder) => child_builder.append_row(builder, value_offset)?,
759 None => builder.append_null(),
760 }
761
762 Ok(())
763 }
764}
765
766pub(crate) struct DictionaryArrowToVariantBuilder<'a> {
768 keys: &'a dyn Array, normalized_keys: Vec<usize>,
770 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
771}
772
773impl<'a> DictionaryArrowToVariantBuilder<'a> {
774 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
775 let dict_array = array.as_any_dictionary();
776 let values = dict_array.values();
777 let values_builder =
778 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
779
780 let normalized_keys = match values.len() {
782 0 => Vec::new(),
783 _ => dict_array.normalized_keys(),
784 };
785
786 Ok(Self {
787 keys: dict_array.keys(),
788 normalized_keys,
789 values_builder: Box::new(values_builder),
790 })
791 }
792
793 fn append_row(
794 &mut self,
795 builder: &mut impl VariantBuilderExt,
796 index: usize,
797 ) -> Result<(), ArrowError> {
798 if self.keys.is_null(index) {
799 builder.append_null();
800 } else {
801 let normalized_key = self.normalized_keys[index];
802 self.values_builder.append_row(builder, normalized_key)?;
803 }
804 Ok(())
805 }
806}
807
808pub(crate) struct RunEndEncodedArrowToVariantBuilder<'a, R: RunEndIndexType> {
810 run_array: &'a arrow::array::RunArray<R>,
811 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
812
813 run_ends: &'a [R::Native],
814 run_number: usize, run_start: usize, }
817
818impl<'a, R: RunEndIndexType> RunEndEncodedArrowToVariantBuilder<'a, R> {
819 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
820 let Some(run_array) = array.as_run_opt() else {
821 return Err(ArrowError::CastError("Expected RunArray".to_string()));
822 };
823
824 let values = run_array.values();
825 let values_builder =
826 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
827
828 Ok(Self {
829 run_array,
830 values_builder: Box::new(values_builder),
831 run_ends: run_array.run_ends().values(),
832 run_number: 0,
833 run_start: 0,
834 })
835 }
836
837 fn set_run_for_index(&mut self, index: usize) -> Result<(), ArrowError> {
838 if index >= self.run_start {
839 let Some(run_end) = self.run_ends.get(self.run_number) else {
840 return Err(ArrowError::CastError(format!(
841 "Index {index} beyond run array"
842 )));
843 };
844 if index < run_end.as_usize() {
845 return Ok(());
846 }
847 if index == run_end.as_usize() {
848 self.run_number += 1;
849 self.run_start = run_end.as_usize();
850 return Ok(());
851 }
852 }
853
854 let run_number = self
856 .run_ends
857 .partition_point(|&run_end| run_end.as_usize() <= index);
858 if run_number >= self.run_ends.len() {
859 return Err(ArrowError::CastError(format!(
860 "Index {index} beyond run array"
861 )));
862 }
863 self.run_number = run_number;
864 self.run_start = match run_number {
865 0 => 0,
866 _ => self.run_ends[run_number - 1].as_usize(),
867 };
868 Ok(())
869 }
870
871 fn append_row(
872 &mut self,
873 builder: &mut impl VariantBuilderExt,
874 index: usize,
875 ) -> Result<(), ArrowError> {
876 self.set_run_for_index(index)?;
877
878 if self.run_array.values().is_null(self.run_number) {
880 builder.append_null();
881 return Ok(());
882 }
883
884 self.values_builder.append_row(builder, self.run_number)?;
886
887 Ok(())
888 }
889}
890
891#[cfg(test)]
892mod tests {
893 use super::*;
894 use crate::{VariantArray, VariantArrayBuilder};
895 use arrow::array::{ArrayRef, BooleanArray, Int32Array, StringArray};
896 use arrow::datatypes::Int32Type;
897 use std::sync::Arc;
898
899 fn execute_row_builder_test(array: &dyn Array) -> VariantArray {
901 execute_row_builder_test_with_options(
902 array,
903 CastOptions {
904 safe: false,
905 ..Default::default()
906 },
907 )
908 }
909
910 fn execute_row_builder_test_with_options(
912 array: &dyn Array,
913 options: CastOptions,
914 ) -> VariantArray {
915 let mut row_builder =
916 make_arrow_to_variant_row_builder(array.data_type(), array, &options).unwrap();
917
918 let mut array_builder = VariantArrayBuilder::new(array.len());
919
920 for i in 0..array.len() {
922 row_builder.append_row(&mut array_builder, i).unwrap();
923 }
924
925 let variant_array = array_builder.build();
926 assert_eq!(variant_array.len(), array.len());
927 variant_array
928 }
929
930 fn test_row_builder_basic(array: &dyn Array, expected_values: Vec<Option<Variant>>) {
933 test_row_builder_basic_with_options(
934 array,
935 expected_values,
936 CastOptions {
937 safe: false,
938 ..Default::default()
939 },
940 );
941 }
942
943 fn test_row_builder_basic_with_options(
945 array: &dyn Array,
946 expected_values: Vec<Option<Variant>>,
947 options: CastOptions,
948 ) {
949 let variant_array = execute_row_builder_test_with_options(array, options);
950
951 for (i, expected) in expected_values.iter().enumerate() {
953 match expected {
954 Some(variant) => {
955 assert_eq!(variant_array.value(i), *variant, "Mismatch at index {}", i)
956 }
957 None => assert!(variant_array.is_null(i), "Expected null at index {}", i),
958 }
959 }
960 }
961
962 #[test]
963 fn test_primitive_row_builder() {
964 let int_array = Int32Array::from(vec![Some(42), None, Some(100)]);
965 test_row_builder_basic(
966 &int_array,
967 vec![Some(Variant::Int32(42)), None, Some(Variant::Int32(100))],
968 );
969 }
970
971 #[test]
972 fn test_string_row_builder() {
973 let string_array = StringArray::from(vec![Some("hello"), None, Some("world")]);
974 test_row_builder_basic(
975 &string_array,
976 vec![
977 Some(Variant::from("hello")),
978 None,
979 Some(Variant::from("world")),
980 ],
981 );
982 }
983
984 #[test]
985 fn test_boolean_row_builder() {
986 let bool_array = BooleanArray::from(vec![Some(true), None, Some(false)]);
987 test_row_builder_basic(
988 &bool_array,
989 vec![Some(Variant::from(true)), None, Some(Variant::from(false))],
990 );
991 }
992
993 #[test]
994 fn test_struct_row_builder() {
995 use arrow::array::{ArrayRef, Int32Array, StringArray, StructArray};
996 use arrow_schema::{DataType, Field};
997 use std::sync::Arc;
998
999 let int_field = Field::new("id", DataType::Int32, true);
1001 let string_field = Field::new("name", DataType::Utf8, true);
1002
1003 let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1004 let string_array = StringArray::from(vec![Some("Alice"), Some("Bob"), None]);
1005
1006 let struct_array = StructArray::try_new(
1007 vec![int_field, string_field].into(),
1008 vec![
1009 Arc::new(int_array) as ArrayRef,
1010 Arc::new(string_array) as ArrayRef,
1011 ],
1012 None,
1013 )
1014 .unwrap();
1015
1016 let variant_array = execute_row_builder_test(&struct_array);
1017
1018 let first_variant = variant_array.value(0);
1020 assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1021 assert_eq!(
1022 first_variant.get_object_field("name"),
1023 Some(Variant::from("Alice"))
1024 );
1025
1026 let second_variant = variant_array.value(1);
1028 assert_eq!(second_variant.get_object_field("id"), None); assert_eq!(
1030 second_variant.get_object_field("name"),
1031 Some(Variant::from("Bob"))
1032 );
1033
1034 let third_variant = variant_array.value(2);
1036 assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(3)));
1037 assert_eq!(third_variant.get_object_field("name"), None); }
1039
1040 #[test]
1041 fn test_run_end_encoded_row_builder() {
1042 use arrow::array::{Int32Array, RunArray};
1043 use arrow::datatypes::Int32Type;
1044
1045 let values = StringArray::from(vec!["A", "B", "C"]);
1049 let run_ends = Int32Array::from(vec![2, 5, 6]);
1050 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1051
1052 let variant_array = execute_row_builder_test(&run_array);
1053
1054 assert_eq!(variant_array.value(0), Variant::from("A")); assert_eq!(variant_array.value(1), Variant::from("A")); assert_eq!(variant_array.value(2), Variant::from("B")); assert_eq!(variant_array.value(3), Variant::from("B")); assert_eq!(variant_array.value(4), Variant::from("B")); assert_eq!(variant_array.value(5), Variant::from("C")); }
1062
1063 #[test]
1064 fn test_run_end_encoded_random_access() {
1065 use arrow::array::{Int32Array, RunArray};
1066 use arrow::datatypes::Int32Type;
1067
1068 let values = StringArray::from(vec!["A", "B", "C"]);
1070 let run_ends = Int32Array::from(vec![2, 5, 6]);
1071 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1072
1073 let options = CastOptions {
1074 safe: false,
1075 ..Default::default()
1076 };
1077 let mut row_builder =
1078 make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1079
1080 let access_pattern = [0, 5, 2, 4, 1, 3]; let expected_values = ["A", "C", "B", "B", "A", "B"];
1083
1084 for (i, &index) in access_pattern.iter().enumerate() {
1085 let mut array_builder = VariantArrayBuilder::new(1);
1086 row_builder.append_row(&mut array_builder, index).unwrap();
1087 let variant_array = array_builder.build();
1088 assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1089 }
1090 }
1091
1092 #[test]
1093 fn test_run_end_encoded_with_nulls() {
1094 use arrow::array::{Int32Array, RunArray};
1095 use arrow::datatypes::Int32Type;
1096
1097 let values = StringArray::from(vec![Some("A"), None, Some("B")]);
1099 let run_ends = Int32Array::from(vec![2, 4, 5]);
1100 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1101
1102 let options = CastOptions {
1103 safe: false,
1104 ..Default::default()
1105 };
1106 let mut row_builder =
1107 make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1108 let mut array_builder = VariantArrayBuilder::new(5);
1109
1110 for i in 0..5 {
1112 row_builder.append_row(&mut array_builder, i).unwrap();
1113 }
1114
1115 let variant_array = array_builder.build();
1116 assert_eq!(variant_array.len(), 5);
1117
1118 assert_eq!(variant_array.value(0), Variant::from("A")); assert_eq!(variant_array.value(1), Variant::from("A")); assert!(variant_array.is_null(2)); assert!(variant_array.is_null(3)); assert_eq!(variant_array.value(4), Variant::from("B")); }
1125
1126 #[test]
1127 fn test_dictionary_row_builder() {
1128 use arrow::array::{DictionaryArray, Int32Array};
1129 use arrow::datatypes::Int32Type;
1130
1131 let values = StringArray::from(vec!["apple", "banana", "cherry"]);
1133 let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1134 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1135
1136 let variant_array = execute_row_builder_test(&dict_array);
1137
1138 assert_eq!(variant_array.value(0), Variant::from("apple")); assert_eq!(variant_array.value(1), Variant::from("banana")); assert_eq!(variant_array.value(2), Variant::from("apple")); assert_eq!(variant_array.value(3), Variant::from("cherry")); assert_eq!(variant_array.value(4), Variant::from("banana")); }
1145
1146 #[test]
1147 fn test_dictionary_with_nulls() {
1148 use arrow::array::{DictionaryArray, Int32Array};
1149 use arrow::datatypes::Int32Type;
1150
1151 let values = StringArray::from(vec!["x", "y", "z"]);
1153 let keys = Int32Array::from(vec![Some(0), None, Some(1), None, Some(2)]);
1154 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1155
1156 let options = CastOptions {
1157 safe: false,
1158 ..Default::default()
1159 };
1160 let mut row_builder =
1161 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1162 .unwrap();
1163 let mut array_builder = VariantArrayBuilder::new(5);
1164
1165 for i in 0..5 {
1167 row_builder.append_row(&mut array_builder, i).unwrap();
1168 }
1169
1170 let variant_array = array_builder.build();
1171 assert_eq!(variant_array.len(), 5);
1172
1173 assert_eq!(variant_array.value(0), Variant::from("x")); assert!(variant_array.is_null(1)); assert_eq!(variant_array.value(2), Variant::from("y")); assert!(variant_array.is_null(3)); assert_eq!(variant_array.value(4), Variant::from("z")); }
1180
1181 #[test]
1182 fn test_dictionary_random_access() {
1183 use arrow::array::{DictionaryArray, Int32Array};
1184 use arrow::datatypes::Int32Type;
1185
1186 let values = StringArray::from(vec!["red", "green", "blue"]);
1188 let keys = Int32Array::from(vec![0, 1, 2, 0, 1, 2]);
1189 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1190
1191 let options = CastOptions {
1192 safe: false,
1193 ..Default::default()
1194 };
1195 let mut row_builder =
1196 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1197 .unwrap();
1198
1199 let access_pattern = [5, 0, 3, 1, 4, 2]; let expected_values = ["blue", "red", "red", "green", "green", "blue"];
1202
1203 for (i, &index) in access_pattern.iter().enumerate() {
1204 let mut array_builder = VariantArrayBuilder::new(1);
1205 row_builder.append_row(&mut array_builder, index).unwrap();
1206 let variant_array = array_builder.build();
1207 assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1208 }
1209 }
1210
1211 #[test]
1212 fn test_nested_dictionary() {
1213 use arrow::array::{DictionaryArray, Int32Array, StructArray};
1214 use arrow::datatypes::{Field, Int32Type};
1215
1216 let id_array = Int32Array::from(vec![1, 2, 3]);
1218 let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
1219 let struct_array = StructArray::from(vec![
1220 (
1221 Arc::new(Field::new("id", DataType::Int32, false)),
1222 Arc::new(id_array) as ArrayRef,
1223 ),
1224 (
1225 Arc::new(Field::new("name", DataType::Utf8, false)),
1226 Arc::new(name_array) as ArrayRef,
1227 ),
1228 ]);
1229
1230 let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1231 let dict_array =
1232 DictionaryArray::<Int32Type>::try_new(keys, Arc::new(struct_array)).unwrap();
1233
1234 let options = CastOptions {
1235 safe: false,
1236 ..Default::default()
1237 };
1238 let mut row_builder =
1239 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1240 .unwrap();
1241 let mut array_builder = VariantArrayBuilder::new(5);
1242
1243 for i in 0..5 {
1245 row_builder.append_row(&mut array_builder, i).unwrap();
1246 }
1247
1248 let variant_array = array_builder.build();
1249 assert_eq!(variant_array.len(), 5);
1250
1251 let first_variant = variant_array.value(0);
1253 assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1254 assert_eq!(
1255 first_variant.get_object_field("name"),
1256 Some(Variant::from("Alice"))
1257 );
1258
1259 let second_variant = variant_array.value(1);
1260 assert_eq!(
1261 second_variant.get_object_field("id"),
1262 Some(Variant::from(2))
1263 );
1264 assert_eq!(
1265 second_variant.get_object_field("name"),
1266 Some(Variant::from("Bob"))
1267 );
1268
1269 let third_variant = variant_array.value(2);
1271 assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(1)));
1272 assert_eq!(
1273 third_variant.get_object_field("name"),
1274 Some(Variant::from("Alice"))
1275 );
1276 }
1277
1278 #[test]
1279 fn test_list_row_builder() {
1280 use arrow::array::ListArray;
1281
1282 let data = vec![
1284 Some(vec![Some(1), Some(2)]),
1285 Some(vec![Some(3), Some(4), Some(5)]),
1286 None,
1287 Some(vec![]),
1288 ];
1289 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1290
1291 let variant_array = execute_row_builder_test(&list_array);
1292
1293 let row0 = variant_array.value(0);
1295 let list0 = row0.as_list().unwrap();
1296 assert_eq!(list0.len(), 2);
1297 assert_eq!(list0.get(0), Some(Variant::from(1)));
1298 assert_eq!(list0.get(1), Some(Variant::from(2)));
1299
1300 let row1 = variant_array.value(1);
1302 let list1 = row1.as_list().unwrap();
1303 assert_eq!(list1.len(), 3);
1304 assert_eq!(list1.get(0), Some(Variant::from(3)));
1305 assert_eq!(list1.get(1), Some(Variant::from(4)));
1306 assert_eq!(list1.get(2), Some(Variant::from(5)));
1307
1308 assert!(variant_array.is_null(2));
1310
1311 let row3 = variant_array.value(3);
1313 let list3 = row3.as_list().unwrap();
1314 assert_eq!(list3.len(), 0);
1315 }
1316
1317 #[test]
1318 fn test_sliced_list_row_builder() {
1319 use arrow::array::ListArray;
1320
1321 let data = vec![
1323 Some(vec![Some(1), Some(2)]),
1324 Some(vec![Some(3), Some(4), Some(5)]),
1325 Some(vec![Some(6)]),
1326 ];
1327 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1328
1329 let sliced_array = list_array.slice(1, 1);
1331
1332 let options = CastOptions {
1333 safe: false,
1334 ..Default::default()
1335 };
1336 let mut row_builder =
1337 make_arrow_to_variant_row_builder(sliced_array.data_type(), &sliced_array, &options)
1338 .unwrap();
1339 let mut variant_array_builder = VariantArrayBuilder::new(sliced_array.len());
1340
1341 row_builder
1343 .append_row(&mut variant_array_builder, 0)
1344 .unwrap();
1345 let variant_array = variant_array_builder.build();
1346
1347 assert_eq!(variant_array.len(), 1);
1349
1350 let row0 = variant_array.value(0);
1352 let list0 = row0.as_list().unwrap();
1353 assert_eq!(list0.len(), 3);
1354 assert_eq!(list0.get(0), Some(Variant::from(3)));
1355 assert_eq!(list0.get(1), Some(Variant::from(4)));
1356 assert_eq!(list0.get(2), Some(Variant::from(5)));
1357 }
1358
1359 #[test]
1360 fn test_nested_list_row_builder() {
1361 use arrow::array::ListArray;
1362 use arrow::datatypes::Field;
1363
1364 let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
1366 let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_field), true));
1367
1368 let values_data = vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])];
1369 let values_list = ListArray::from_iter_primitive::<Int32Type, _, _>(values_data);
1370
1371 let outer_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 2].into());
1372 let outer_list = ListArray::new(
1373 inner_list_field,
1374 outer_offsets,
1375 Arc::new(values_list),
1376 Some(arrow::buffer::NullBuffer::from(vec![true, false])),
1377 );
1378
1379 let options = CastOptions {
1380 safe: false,
1381 ..Default::default()
1382 };
1383 let mut row_builder =
1384 make_arrow_to_variant_row_builder(outer_list.data_type(), &outer_list, &options)
1385 .unwrap();
1386 let mut variant_array_builder = VariantArrayBuilder::new(outer_list.len());
1387
1388 for i in 0..outer_list.len() {
1389 row_builder
1390 .append_row(&mut variant_array_builder, i)
1391 .unwrap();
1392 }
1393
1394 let variant_array = variant_array_builder.build();
1395
1396 assert_eq!(variant_array.len(), 2);
1398
1399 let row0 = variant_array.value(0);
1401 let outer_list0 = row0.as_list().unwrap();
1402 assert_eq!(outer_list0.len(), 2);
1403
1404 let inner_list0_0 = outer_list0.get(0).unwrap();
1405 let inner_list0_0 = inner_list0_0.as_list().unwrap();
1406 assert_eq!(inner_list0_0.len(), 2);
1407 assert_eq!(inner_list0_0.get(0), Some(Variant::from(1)));
1408 assert_eq!(inner_list0_0.get(1), Some(Variant::from(2)));
1409
1410 let inner_list0_1 = outer_list0.get(1).unwrap();
1411 let inner_list0_1 = inner_list0_1.as_list().unwrap();
1412 assert_eq!(inner_list0_1.len(), 1);
1413 assert_eq!(inner_list0_1.get(0), Some(Variant::from(3)));
1414
1415 assert!(variant_array.is_null(1));
1417 }
1418
1419 #[test]
1420 fn test_map_row_builder() {
1421 use arrow::array::{Int32Array, MapArray, StringArray, StructArray};
1422 use arrow::buffer::{NullBuffer, OffsetBuffer};
1423 use arrow::datatypes::{DataType, Field, Fields};
1424 use std::sync::Arc;
1425
1426 let keys = StringArray::from(vec!["key1", "key2", "key3"]);
1428 let values = Int32Array::from(vec![1, 2, 3]);
1429 let entries_fields = Fields::from(vec![
1430 Field::new("key", DataType::Utf8, false),
1431 Field::new("value", DataType::Int32, true),
1432 ]);
1433 let entries = StructArray::new(
1434 entries_fields.clone(),
1435 vec![Arc::new(keys), Arc::new(values)],
1436 None, );
1438
1439 let offsets = OffsetBuffer::new(vec![0, 1, 1, 1, 3].into());
1445
1446 let null_buffer = Some(NullBuffer::from(vec![true, true, false, true]));
1448
1449 let map_field = Arc::new(Field::new(
1451 "entries",
1452 DataType::Struct(entries_fields),
1453 false, ));
1455
1456 let map_array = MapArray::try_new(
1458 map_field,
1459 offsets,
1460 entries,
1461 null_buffer,
1462 false, )
1464 .unwrap();
1465
1466 let variant_array = execute_row_builder_test(&map_array);
1467
1468 let map0 = variant_array.value(0);
1470 let obj0 = map0.as_object().unwrap();
1471 assert_eq!(obj0.len(), 1);
1472 assert_eq!(obj0.get("key1"), Some(Variant::from(1)));
1473
1474 let map1 = variant_array.value(1);
1476 let obj1 = map1.as_object().unwrap();
1477 assert_eq!(obj1.len(), 0); assert!(variant_array.is_null(2));
1481
1482 let map3 = variant_array.value(3);
1484 let obj3 = map3.as_object().unwrap();
1485 assert_eq!(obj3.len(), 2);
1486 assert_eq!(obj3.get("key2"), Some(Variant::from(2)));
1487 assert_eq!(obj3.get("key3"), Some(Variant::from(3)));
1488 }
1489
1490 #[test]
1491 fn test_union_sparse_row_builder() {
1492 use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1493 use arrow::buffer::ScalarBuffer;
1494 use arrow::datatypes::{DataType, Field, UnionFields};
1495 use std::sync::Arc;
1496
1497 let int_array = Int32Array::from(vec![Some(1), None, None, None, Some(34), None]);
1499 let float_array = Float64Array::from(vec![None, Some(3.2), None, Some(32.5), None, None]);
1500 let string_array = StringArray::from(vec![None, None, Some("hello"), None, None, None]);
1501 let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1502
1503 let union_fields = UnionFields::from_fields(vec![
1504 Field::new("int_field", DataType::Int32, false),
1505 Field::new("float_field", DataType::Float64, false),
1506 Field::new("string_field", DataType::Utf8, false),
1507 ]);
1508
1509 let children: Vec<Arc<dyn Array>> = vec![
1510 Arc::new(int_array),
1511 Arc::new(float_array),
1512 Arc::new(string_array),
1513 ];
1514
1515 let union_array = UnionArray::try_new(
1516 union_fields,
1517 type_ids,
1518 None, children,
1520 )
1521 .unwrap();
1522
1523 let variant_array = execute_row_builder_test(&union_array);
1524 assert_eq!(variant_array.value(0), Variant::Int32(1));
1525 assert_eq!(variant_array.value(1), Variant::Double(3.2));
1526 assert_eq!(variant_array.value(2), Variant::from("hello"));
1527 assert_eq!(variant_array.value(3), Variant::Double(32.5));
1528 assert_eq!(variant_array.value(4), Variant::Int32(34));
1529 assert!(variant_array.is_null(5));
1530 }
1531
1532 #[test]
1533 fn test_union_dense_row_builder() {
1534 use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1535 use arrow::buffer::ScalarBuffer;
1536 use arrow::datatypes::{DataType, Field, UnionFields};
1537 use std::sync::Arc;
1538
1539 let int_array = Int32Array::from(vec![Some(1), Some(34), None]);
1541 let float_array = Float64Array::from(vec![3.2, 32.5]);
1542 let string_array = StringArray::from(vec!["hello"]);
1543 let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1544 let offsets = [0, 0, 0, 1, 1, 2]
1545 .into_iter()
1546 .collect::<ScalarBuffer<i32>>();
1547
1548 let union_fields = UnionFields::from_fields(vec![
1549 Field::new("int_field", DataType::Int32, false),
1550 Field::new("float_field", DataType::Float64, false),
1551 Field::new("string_field", DataType::Utf8, false),
1552 ]);
1553
1554 let children: Vec<Arc<dyn Array>> = vec![
1555 Arc::new(int_array),
1556 Arc::new(float_array),
1557 Arc::new(string_array),
1558 ];
1559
1560 let union_array = UnionArray::try_new(
1561 union_fields,
1562 type_ids,
1563 Some(offsets), children,
1565 )
1566 .unwrap();
1567
1568 let options = CastOptions {
1570 safe: false,
1571 ..Default::default()
1572 };
1573 let mut row_builder =
1574 make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1575 .unwrap();
1576
1577 let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1578 for i in 0..union_array.len() {
1579 row_builder.append_row(&mut variant_builder, i).unwrap();
1580 }
1581 let variant_array = variant_builder.build();
1582
1583 assert_eq!(variant_array.len(), 6);
1584 assert_eq!(variant_array.value(0), Variant::Int32(1));
1585 assert_eq!(variant_array.value(1), Variant::Double(3.2));
1586 assert_eq!(variant_array.value(2), Variant::from("hello"));
1587 assert_eq!(variant_array.value(3), Variant::Double(32.5));
1588 assert_eq!(variant_array.value(4), Variant::Int32(34));
1589 assert!(variant_array.is_null(5));
1590 }
1591
1592 #[test]
1593 fn test_union_sparse_type_ids_row_builder() {
1594 use arrow::array::{Int32Array, StringArray, UnionArray};
1595 use arrow::buffer::ScalarBuffer;
1596 use arrow::datatypes::{DataType, Field, UnionFields};
1597 use std::sync::Arc;
1598
1599 let int_array = Int32Array::from(vec![Some(42), None]);
1601 let string_array = StringArray::from(vec![None, Some("test")]);
1602 let type_ids = [1, 3].into_iter().collect::<ScalarBuffer<i8>>();
1603
1604 let union_fields = UnionFields::try_new(
1605 vec![1, 3], vec![
1607 Field::new("int_field", DataType::Int32, false),
1608 Field::new("string_field", DataType::Utf8, false),
1609 ],
1610 )
1611 .unwrap();
1612
1613 let children: Vec<Arc<dyn Array>> = vec![Arc::new(int_array), Arc::new(string_array)];
1614
1615 let union_array = UnionArray::try_new(
1616 union_fields,
1617 type_ids,
1618 None, children,
1620 )
1621 .unwrap();
1622
1623 let options = CastOptions {
1625 safe: false,
1626 ..Default::default()
1627 };
1628 let mut row_builder =
1629 make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1630 .unwrap();
1631
1632 let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1633 for i in 0..union_array.len() {
1634 row_builder.append_row(&mut variant_builder, i).unwrap();
1635 }
1636 let variant_array = variant_builder.build();
1637
1638 assert_eq!(variant_array.len(), 2);
1640
1641 assert_eq!(variant_array.value(0), Variant::Int32(42));
1643
1644 assert_eq!(variant_array.value(1), Variant::from("test"));
1646 }
1647
1648 #[test]
1649 fn test_decimal32_row_builder() {
1650 use arrow::array::Decimal32Array;
1651 use parquet_variant::VariantDecimal4;
1652
1653 let decimal_array = Decimal32Array::from(vec![Some(1234), None, Some(-5678)])
1655 .with_precision_and_scale(9, 2)
1656 .unwrap();
1657
1658 test_row_builder_basic(
1659 &decimal_array,
1660 vec![
1661 Some(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())),
1662 None,
1663 Some(Variant::from(VariantDecimal4::try_new(-5678, 2).unwrap())),
1664 ],
1665 );
1666 }
1667
1668 #[test]
1669 fn test_decimal128_row_builder() {
1670 use arrow::array::Decimal128Array;
1671 use parquet_variant::VariantDecimal16;
1672
1673 let decimal_array = Decimal128Array::from(vec![Some(123), None, Some(456)])
1675 .with_precision_and_scale(10, -2)
1676 .unwrap();
1677
1678 test_row_builder_basic(
1679 &decimal_array,
1680 vec![
1681 Some(Variant::from(VariantDecimal16::try_new(12300, 0).unwrap())),
1682 None,
1683 Some(Variant::from(VariantDecimal16::try_new(45600, 0).unwrap())),
1684 ],
1685 );
1686 }
1687
1688 #[test]
1689 fn test_decimal256_overflow_row_builder() {
1690 use arrow::array::Decimal256Array;
1691 use arrow::datatypes::i256;
1692
1693 let large_value = i256::from_i128(i128::MAX) + i256::from(1); let decimal_array = Decimal256Array::from(vec![Some(large_value), Some(i256::from(123))])
1696 .with_precision_and_scale(76, 3)
1697 .unwrap();
1698
1699 test_row_builder_basic_with_options(
1700 &decimal_array,
1701 vec![
1702 Some(Variant::Null), Some(Variant::from(VariantDecimal16::try_new(123, 3).unwrap())),
1704 ],
1705 CastOptions::default(),
1706 );
1707 }
1708
1709 #[test]
1710 fn test_binary_row_builder() {
1711 use arrow::array::BinaryArray;
1712
1713 let binary_data = vec![
1714 Some(b"hello".as_slice()),
1715 None,
1716 Some(b"\x00\x01\x02\xFF".as_slice()),
1717 Some(b"".as_slice()), ];
1719 let binary_array = BinaryArray::from(binary_data);
1720
1721 test_row_builder_basic(
1722 &binary_array,
1723 vec![
1724 Some(Variant::from(b"hello".as_slice())),
1725 None,
1726 Some(Variant::from([0x00, 0x01, 0x02, 0xFF].as_slice())),
1727 Some(Variant::from([].as_slice())),
1728 ],
1729 );
1730 }
1731
1732 #[test]
1733 fn test_binary_view_row_builder() {
1734 use arrow::array::BinaryViewArray;
1735
1736 let binary_data = vec![
1737 Some(b"short".as_slice()),
1738 None,
1739 Some(b"this is a longer binary view that exceeds inline storage".as_slice()),
1740 ];
1741 let binary_view_array = BinaryViewArray::from(binary_data);
1742
1743 test_row_builder_basic(
1744 &binary_view_array,
1745 vec![
1746 Some(Variant::from(b"short".as_slice())),
1747 None,
1748 Some(Variant::from(
1749 b"this is a longer binary view that exceeds inline storage".as_slice(),
1750 )),
1751 ],
1752 );
1753 }
1754
1755 #[test]
1756 fn test_fixed_size_binary_row_builder() {
1757 use arrow::array::FixedSizeBinaryArray;
1758
1759 let binary_data = vec![
1760 Some([0x01, 0x02, 0x03, 0x04]),
1761 None,
1762 Some([0xFF, 0xFE, 0xFD, 0xFC]),
1763 ];
1764 let fixed_binary_array =
1765 FixedSizeBinaryArray::try_from_sparse_iter_with_size(binary_data.into_iter(), 4)
1766 .unwrap();
1767
1768 test_row_builder_basic(
1769 &fixed_binary_array,
1770 vec![
1771 Some(Variant::from([0x01, 0x02, 0x03, 0x04].as_slice())),
1772 None,
1773 Some(Variant::from([0xFF, 0xFE, 0xFD, 0xFC].as_slice())),
1774 ],
1775 );
1776 }
1777
1778 #[test]
1779 fn test_utf8_view_row_builder() {
1780 use arrow::array::StringViewArray;
1781
1782 let string_data = vec![
1783 Some("short"),
1784 None,
1785 Some("this is a much longer string that will be stored out-of-line in the buffer"),
1786 ];
1787 let string_view_array = StringViewArray::from(string_data);
1788
1789 test_row_builder_basic(
1790 &string_view_array,
1791 vec![
1792 Some(Variant::from("short")),
1793 None,
1794 Some(Variant::from(
1795 "this is a much longer string that will be stored out-of-line in the buffer",
1796 )),
1797 ],
1798 );
1799 }
1800
1801 #[test]
1802 fn test_timestamp_second_row_builder() {
1803 use arrow::array::TimestampSecondArray;
1804
1805 let timestamp_data = vec![
1806 Some(1609459200), None,
1808 Some(1640995200), ];
1810 let timestamp_array = TimestampSecondArray::from(timestamp_data);
1811
1812 let expected_naive1 = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1813 let expected_naive2 = DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc();
1814
1815 test_row_builder_basic(
1816 ×tamp_array,
1817 vec![
1818 Some(Variant::from(expected_naive1)),
1819 None,
1820 Some(Variant::from(expected_naive2)),
1821 ],
1822 );
1823 }
1824
1825 #[test]
1826 fn test_timestamp_with_timezone_row_builder() {
1827 use arrow::array::TimestampMicrosecondArray;
1828 use chrono::DateTime;
1829
1830 let timestamp_data = vec![
1831 Some(1609459200000000), None,
1833 Some(1640995200000000), ];
1835 let timezone = "UTC".to_string();
1836 let timestamp_array =
1837 TimestampMicrosecondArray::from(timestamp_data).with_timezone(timezone);
1838
1839 let expected_utc1 = DateTime::from_timestamp(1609459200, 0).unwrap();
1840 let expected_utc2 = DateTime::from_timestamp(1640995200, 0).unwrap();
1841
1842 test_row_builder_basic(
1843 ×tamp_array,
1844 vec![
1845 Some(Variant::from(expected_utc1)),
1846 None,
1847 Some(Variant::from(expected_utc2)),
1848 ],
1849 );
1850 }
1851
1852 #[test]
1853 fn test_timestamp_nanosecond_precision_row_builder() {
1854 use arrow::array::TimestampNanosecondArray;
1855
1856 let timestamp_data = vec![
1857 Some(1609459200123456789), None,
1859 Some(1609459200000000000), ];
1861 let timestamp_array = TimestampNanosecondArray::from(timestamp_data);
1862
1863 let expected_with_nanos = DateTime::from_timestamp(1609459200, 123456789)
1864 .unwrap()
1865 .naive_utc();
1866 let expected_no_nanos = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1867
1868 test_row_builder_basic(
1869 ×tamp_array,
1870 vec![
1871 Some(Variant::from(expected_with_nanos)),
1872 None,
1873 Some(Variant::from(expected_no_nanos)),
1874 ],
1875 );
1876 }
1877
1878 #[test]
1879 fn test_timestamp_millisecond_row_builder() {
1880 use arrow::array::TimestampMillisecondArray;
1881
1882 let timestamp_data = vec![
1883 Some(1609459200123), None,
1885 Some(1609459200000), ];
1887 let timestamp_array = TimestampMillisecondArray::from(timestamp_data);
1888
1889 let expected_with_millis = DateTime::from_timestamp(1609459200, 123000000)
1890 .unwrap()
1891 .naive_utc();
1892 let expected_no_millis = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1893
1894 test_row_builder_basic(
1895 ×tamp_array,
1896 vec![
1897 Some(Variant::from(expected_with_millis)),
1898 None,
1899 Some(Variant::from(expected_no_millis)),
1900 ],
1901 );
1902 }
1903
1904 #[test]
1905 fn test_date32_row_builder() {
1906 use arrow::array::Date32Array;
1907 use chrono::NaiveDate;
1908
1909 let date_data = vec![
1910 Some(0), None,
1912 Some(19723), Some(-719162), ];
1915 let date_array = Date32Array::from(date_data);
1916
1917 let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1918 let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1919 let expected_min = NaiveDate::from_ymd_opt(1, 1, 1).unwrap();
1920
1921 test_row_builder_basic(
1922 &date_array,
1923 vec![
1924 Some(Variant::from(expected_epoch)),
1925 None,
1926 Some(Variant::from(expected_2024)),
1927 Some(Variant::from(expected_min)),
1928 ],
1929 );
1930 }
1931
1932 #[test]
1933 fn test_date64_row_builder() {
1934 use arrow::array::Date64Array;
1935 use chrono::NaiveDate;
1936
1937 let date_data = vec![
1939 Some(0), None,
1941 Some(1704067200000), Some(86400000), ];
1944 let date_array = Date64Array::from(date_data);
1945
1946 let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1947 let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1948 let expected_next_day = NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();
1949
1950 test_row_builder_basic(
1951 &date_array,
1952 vec![
1953 Some(Variant::from(expected_epoch)),
1954 None,
1955 Some(Variant::from(expected_2024)),
1956 Some(Variant::from(expected_next_day)),
1957 ],
1958 );
1959 }
1960
1961 #[test]
1962 fn test_time32_second_row_builder() {
1963 use arrow::array::Time32SecondArray;
1964 use chrono::NaiveTime;
1965
1966 let time_data = vec![
1968 Some(0), None,
1970 Some(3661), Some(86399), ];
1973 let time_array = Time32SecondArray::from(time_data);
1974
1975 let expected_midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
1976 let expected_time = NaiveTime::from_hms_opt(1, 1, 1).unwrap();
1977 let expected_last = NaiveTime::from_hms_opt(23, 59, 59).unwrap();
1978
1979 test_row_builder_basic(
1980 &time_array,
1981 vec![
1982 Some(Variant::from(expected_midnight)),
1983 None,
1984 Some(Variant::from(expected_time)),
1985 Some(Variant::from(expected_last)),
1986 ],
1987 );
1988 }
1989
1990 #[test]
1991 fn test_time32_millisecond_row_builder() {
1992 use arrow::array::Time32MillisecondArray;
1993 use chrono::NaiveTime;
1994
1995 let time_data = vec![
1997 Some(0), None,
1999 Some(3661123), Some(86399999), ];
2002 let time_array = Time32MillisecondArray::from(time_data);
2003
2004 let expected_midnight = NaiveTime::from_hms_milli_opt(0, 0, 0, 0).unwrap();
2005 let expected_time = NaiveTime::from_hms_milli_opt(1, 1, 1, 123).unwrap();
2006 let expected_last = NaiveTime::from_hms_milli_opt(23, 59, 59, 999).unwrap();
2007
2008 test_row_builder_basic(
2009 &time_array,
2010 vec![
2011 Some(Variant::from(expected_midnight)),
2012 None,
2013 Some(Variant::from(expected_time)),
2014 Some(Variant::from(expected_last)),
2015 ],
2016 );
2017 }
2018
2019 #[test]
2020 fn test_time64_microsecond_row_builder() {
2021 use arrow::array::Time64MicrosecondArray;
2022 use chrono::NaiveTime;
2023
2024 let time_data = vec![
2026 Some(0), None,
2028 Some(3661123456), Some(86399999999), ];
2031 let time_array = Time64MicrosecondArray::from(time_data);
2032
2033 let expected_midnight = NaiveTime::from_hms_micro_opt(0, 0, 0, 0).unwrap();
2034 let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2035 let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2036
2037 test_row_builder_basic(
2038 &time_array,
2039 vec![
2040 Some(Variant::from(expected_midnight)),
2041 None,
2042 Some(Variant::from(expected_time)),
2043 Some(Variant::from(expected_last)),
2044 ],
2045 );
2046 }
2047
2048 #[test]
2049 fn test_time64_nanosecond_row_builder() {
2050 use arrow::array::Time64NanosecondArray;
2051 use chrono::NaiveTime;
2052
2053 let time_data = vec![
2055 Some(0), None,
2057 Some(3661123456789), Some(86399999999999), ];
2060 let time_array = Time64NanosecondArray::from(time_data);
2061
2062 let expected_midnight = NaiveTime::from_hms_nano_opt(0, 0, 0, 0).unwrap();
2063 let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2065 let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2066
2067 test_row_builder_basic(
2068 &time_array,
2069 vec![
2070 Some(Variant::from(expected_midnight)),
2071 None,
2072 Some(Variant::from(expected_time)),
2073 Some(Variant::from(expected_last)),
2074 ],
2075 );
2076 }
2077}