1use arrow::array::{
19 Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray,
20 GenericStringArray, ListLikeArray, OffsetSizeTrait, PrimitiveArray,
21};
22use arrow::compute::{CastOptions, kernels::cast};
23use arrow::datatypes::{
24 self as datatypes, ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType,
25 DecimalType, RunEndIndexType,
26};
27use arrow::temporal_conversions::{as_date, as_datetime, as_time};
28use arrow_schema::{ArrowError, DataType, TimeUnit};
29use chrono::{DateTime, TimeZone, Utc};
30use parquet_variant::{
31 ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
32 VariantDecimal16, VariantDecimalType,
33};
34use std::collections::HashMap;
35
36pub(crate) enum ArrowToVariantRowBuilder<'a> {
42 Null(NullArrowToVariantBuilder),
43 Boolean(BooleanArrowToVariantBuilder<'a>),
44 PrimitiveInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::Int8Type>),
45 PrimitiveInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::Int16Type>),
46 PrimitiveInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::Int32Type>),
47 PrimitiveInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::Int64Type>),
48 PrimitiveUInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt8Type>),
49 PrimitiveUInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt16Type>),
50 PrimitiveUInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt32Type>),
51 PrimitiveUInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt64Type>),
52 PrimitiveFloat16(PrimitiveArrowToVariantBuilder<'a, datatypes::Float16Type>),
53 PrimitiveFloat32(PrimitiveArrowToVariantBuilder<'a, datatypes::Float32Type>),
54 PrimitiveFloat64(PrimitiveArrowToVariantBuilder<'a, datatypes::Float64Type>),
55 Decimal32(DecimalArrowToVariantBuilder<'a, datatypes::Decimal32Type, VariantDecimal4>),
56 Decimal64(DecimalArrowToVariantBuilder<'a, datatypes::Decimal64Type, VariantDecimal8>),
57 Decimal128(DecimalArrowToVariantBuilder<'a, datatypes::Decimal128Type, VariantDecimal16>),
58 Decimal256(Decimal256ArrowToVariantBuilder<'a>),
59 TimestampSecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampSecondType>),
60 TimestampMillisecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMillisecondType>),
61 TimestampMicrosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMicrosecondType>),
62 TimestampNanosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampNanosecondType>),
63 Date32(DateArrowToVariantBuilder<'a, datatypes::Date32Type>),
64 Date64(DateArrowToVariantBuilder<'a, datatypes::Date64Type>),
65 Time32Second(TimeArrowToVariantBuilder<'a, datatypes::Time32SecondType>),
66 Time32Millisecond(TimeArrowToVariantBuilder<'a, datatypes::Time32MillisecondType>),
67 Time64Microsecond(TimeArrowToVariantBuilder<'a, datatypes::Time64MicrosecondType>),
68 Time64Nanosecond(TimeArrowToVariantBuilder<'a, datatypes::Time64NanosecondType>),
69 Binary(BinaryArrowToVariantBuilder<'a, i32>),
70 LargeBinary(BinaryArrowToVariantBuilder<'a, i64>),
71 BinaryView(BinaryViewArrowToVariantBuilder<'a>),
72 FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder<'a>),
73 Utf8(StringArrowToVariantBuilder<'a, i32>),
74 LargeUtf8(StringArrowToVariantBuilder<'a, i64>),
75 Utf8View(StringViewArrowToVariantBuilder<'a>),
76 List(ListArrowToVariantBuilder<'a, GenericListArray<i32>>),
77 LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
78 ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
79 LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
80 FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
81 Struct(StructArrowToVariantBuilder<'a>),
82 Map(MapArrowToVariantBuilder<'a>),
83 Union(UnionArrowToVariantBuilder<'a>),
84 Dictionary(DictionaryArrowToVariantBuilder<'a>),
85 RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int16Type>),
86 RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int32Type>),
87 RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int64Type>),
88}
89
90impl<'a> ArrowToVariantRowBuilder<'a> {
91 pub fn append_row(
93 &mut self,
94 builder: &mut impl VariantBuilderExt,
95 index: usize,
96 ) -> Result<(), ArrowError> {
97 use ArrowToVariantRowBuilder::*;
98 match self {
99 Null(b) => b.append_row(builder, index),
100 Boolean(b) => b.append_row(builder, index),
101 PrimitiveInt8(b) => b.append_row(builder, index),
102 PrimitiveInt16(b) => b.append_row(builder, index),
103 PrimitiveInt32(b) => b.append_row(builder, index),
104 PrimitiveInt64(b) => b.append_row(builder, index),
105 PrimitiveUInt8(b) => b.append_row(builder, index),
106 PrimitiveUInt16(b) => b.append_row(builder, index),
107 PrimitiveUInt32(b) => b.append_row(builder, index),
108 PrimitiveUInt64(b) => b.append_row(builder, index),
109 PrimitiveFloat16(b) => b.append_row(builder, index),
110 PrimitiveFloat32(b) => b.append_row(builder, index),
111 PrimitiveFloat64(b) => b.append_row(builder, index),
112 Decimal32(b) => b.append_row(builder, index),
113 Decimal64(b) => b.append_row(builder, index),
114 Decimal128(b) => b.append_row(builder, index),
115 Decimal256(b) => b.append_row(builder, index),
116 TimestampSecond(b) => b.append_row(builder, index),
117 TimestampMillisecond(b) => b.append_row(builder, index),
118 TimestampMicrosecond(b) => b.append_row(builder, index),
119 TimestampNanosecond(b) => b.append_row(builder, index),
120 Date32(b) => b.append_row(builder, index),
121 Date64(b) => b.append_row(builder, index),
122 Time32Second(b) => b.append_row(builder, index),
123 Time32Millisecond(b) => b.append_row(builder, index),
124 Time64Microsecond(b) => b.append_row(builder, index),
125 Time64Nanosecond(b) => b.append_row(builder, index),
126 Binary(b) => b.append_row(builder, index),
127 LargeBinary(b) => b.append_row(builder, index),
128 BinaryView(b) => b.append_row(builder, index),
129 FixedSizeBinary(b) => b.append_row(builder, index),
130 Utf8(b) => b.append_row(builder, index),
131 LargeUtf8(b) => b.append_row(builder, index),
132 Utf8View(b) => b.append_row(builder, index),
133 List(b) => b.append_row(builder, index),
134 LargeList(b) => b.append_row(builder, index),
135 ListView(b) => b.append_row(builder, index),
136 LargeListView(b) => b.append_row(builder, index),
137 FixedSizeList(b) => b.append_row(builder, index),
138 Struct(b) => b.append_row(builder, index),
139 Map(b) => b.append_row(builder, index),
140 Union(b) => b.append_row(builder, index),
141 Dictionary(b) => b.append_row(builder, index),
142 RunEndEncodedInt16(b) => b.append_row(builder, index),
143 RunEndEncodedInt32(b) => b.append_row(builder, index),
144 RunEndEncodedInt64(b) => b.append_row(builder, index),
145 }
146 }
147}
148
149pub(crate) fn make_arrow_to_variant_row_builder<'a>(
151 data_type: &'a DataType,
152 array: &'a dyn Array,
153 options: &'a CastOptions,
154) -> Result<ArrowToVariantRowBuilder<'a>, ArrowError> {
155 use ArrowToVariantRowBuilder::*;
156 let builder =
157 match data_type {
158 DataType::Null => Null(NullArrowToVariantBuilder),
159 DataType::Boolean => Boolean(BooleanArrowToVariantBuilder::new(array)),
160 DataType::Int8 => PrimitiveInt8(PrimitiveArrowToVariantBuilder::new(array)),
161 DataType::Int16 => PrimitiveInt16(PrimitiveArrowToVariantBuilder::new(array)),
162 DataType::Int32 => PrimitiveInt32(PrimitiveArrowToVariantBuilder::new(array)),
163 DataType::Int64 => PrimitiveInt64(PrimitiveArrowToVariantBuilder::new(array)),
164 DataType::UInt8 => PrimitiveUInt8(PrimitiveArrowToVariantBuilder::new(array)),
165 DataType::UInt16 => PrimitiveUInt16(PrimitiveArrowToVariantBuilder::new(array)),
166 DataType::UInt32 => PrimitiveUInt32(PrimitiveArrowToVariantBuilder::new(array)),
167 DataType::UInt64 => PrimitiveUInt64(PrimitiveArrowToVariantBuilder::new(array)),
168 DataType::Float16 => PrimitiveFloat16(PrimitiveArrowToVariantBuilder::new(array)),
169 DataType::Float32 => PrimitiveFloat32(PrimitiveArrowToVariantBuilder::new(array)),
170 DataType::Float64 => PrimitiveFloat64(PrimitiveArrowToVariantBuilder::new(array)),
171 DataType::Decimal32(_, scale) => {
172 Decimal32(DecimalArrowToVariantBuilder::new(array, options, *scale))
173 }
174 DataType::Decimal64(_, scale) => {
175 Decimal64(DecimalArrowToVariantBuilder::new(array, options, *scale))
176 }
177 DataType::Decimal128(_, scale) => {
178 Decimal128(DecimalArrowToVariantBuilder::new(array, options, *scale))
179 }
180 DataType::Decimal256(_, scale) => {
181 Decimal256(Decimal256ArrowToVariantBuilder::new(array, options, *scale))
182 }
183 DataType::Timestamp(time_unit, time_zone) => {
184 match time_unit {
185 TimeUnit::Second => TimestampSecond(TimestampArrowToVariantBuilder::new(
186 array,
187 options,
188 time_zone.is_some(),
189 )),
190 TimeUnit::Millisecond => TimestampMillisecond(
191 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
192 ),
193 TimeUnit::Microsecond => TimestampMicrosecond(
194 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
195 ),
196 TimeUnit::Nanosecond => TimestampNanosecond(
197 TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
198 ),
199 }
200 }
201 DataType::Date32 => Date32(DateArrowToVariantBuilder::new(array, options)),
202 DataType::Date64 => Date64(DateArrowToVariantBuilder::new(array, options)),
203 DataType::Time32(time_unit) => match time_unit {
204 TimeUnit::Second => Time32Second(TimeArrowToVariantBuilder::new(array, options)),
205 TimeUnit::Millisecond => {
206 Time32Millisecond(TimeArrowToVariantBuilder::new(array, options))
207 }
208 _ => {
209 return Err(ArrowError::CastError(format!(
210 "Unsupported Time32 unit: {time_unit:?}"
211 )));
212 }
213 },
214 DataType::Time64(time_unit) => match time_unit {
215 TimeUnit::Microsecond => {
216 Time64Microsecond(TimeArrowToVariantBuilder::new(array, options))
217 }
218 TimeUnit::Nanosecond => {
219 Time64Nanosecond(TimeArrowToVariantBuilder::new(array, options))
220 }
221 _ => {
222 return Err(ArrowError::CastError(format!(
223 "Unsupported Time64 unit: {time_unit:?}"
224 )));
225 }
226 },
227 DataType::Duration(_) | DataType::Interval(_) => {
228 return Err(ArrowError::InvalidArgumentError(
229 "Casting duration/interval types to Variant is not supported. \
230 The Variant format does not define duration/interval types."
231 .to_string(),
232 ));
233 }
234 DataType::Binary => Binary(BinaryArrowToVariantBuilder::new(array)),
235 DataType::LargeBinary => LargeBinary(BinaryArrowToVariantBuilder::new(array)),
236 DataType::BinaryView => BinaryView(BinaryViewArrowToVariantBuilder::new(array)),
237 DataType::FixedSizeBinary(_) => {
238 FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder::new(array))
239 }
240 DataType::Utf8 => Utf8(StringArrowToVariantBuilder::new(array)),
241 DataType::LargeUtf8 => LargeUtf8(StringArrowToVariantBuilder::new(array)),
242 DataType::Utf8View => Utf8View(StringViewArrowToVariantBuilder::new(array)),
243 DataType::List(_) => List(ListArrowToVariantBuilder::new(array.as_list(), options)?),
244 DataType::LargeList(_) => {
245 LargeList(ListArrowToVariantBuilder::new(array.as_list(), options)?)
246 }
247 DataType::ListView(_) => ListView(ListArrowToVariantBuilder::new(
248 array.as_list_view(),
249 options,
250 )?),
251 DataType::LargeListView(_) => LargeListView(ListArrowToVariantBuilder::new(
252 array.as_list_view(),
253 options,
254 )?),
255 DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
256 array.as_fixed_size_list(),
257 options,
258 )?),
259 DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
260 array.as_struct(),
261 options,
262 )?),
263 DataType::Map(_, _) => Map(MapArrowToVariantBuilder::new(array, options)?),
264 DataType::Union(_, _) => Union(UnionArrowToVariantBuilder::new(array, options)?),
265 DataType::Dictionary(_, _) => {
266 Dictionary(DictionaryArrowToVariantBuilder::new(array, options)?)
267 }
268 DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
269 DataType::Int16 => {
270 RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
271 }
272 DataType::Int32 => {
273 RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
274 }
275 DataType::Int64 => {
276 RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
277 }
278 _ => {
279 return Err(ArrowError::CastError(format!(
280 "Unsupported run ends type: {}",
281 run_ends.data_type()
282 )));
283 }
284 },
285 };
286 Ok(builder)
287}
288
289macro_rules! define_row_builder {
317 (
318 struct $name:ident<$lifetime:lifetime $(, $generic:ident $( : $bound:path )? )*>
319 $( where $where_path:path: $where_bound:path $(,)? )?
320 $({ $( $field:ident: $field_type:ty ),+ $(,)? })?,
321 |$array_param:ident| -> $array_type:ty { $init_expr:expr }
322 $(, |$value:ident| $(-> Option<$option_ty:ty>)? $value_transform:expr )?
323 ) => {
324 pub(crate) struct $name<$lifetime $(, $generic: $( $bound )? )*>
325 $( where $where_path: $where_bound )?
326 {
327 array: &$lifetime $array_type,
328 $( $( $field: $field_type, )+ )?
329 _phantom: std::marker::PhantomData<($( $generic, )*)>, }
331
332 impl<$lifetime $(, $generic: $( $bound )? )*> $name<$lifetime $(, $generic)*>
333 $( where $where_path: $where_bound )?
334 {
335 pub(crate) fn new($array_param: &$lifetime dyn Array $( $(, $field: $field_type )+ )?) -> Self {
336 Self {
337 array: $init_expr,
338 $( $( $field, )+ )?
339 _phantom: std::marker::PhantomData,
340 }
341 }
342
343 fn append_row(&self, builder: &mut impl VariantBuilderExt, index: usize) -> Result<(), ArrowError> {
344 if self.array.is_null(index) {
345 builder.append_null();
346 } else {
347 $(
356 #[allow(unused)]
357 $( let $field = &self.$field; )+
358 )?
359
360 let value = self.array.value(index);
362 $(
363 let $value = value;
364 let value = $value_transform;
365 $(
366 let Some(value): Option<$option_ty> = value else {
368 if !self.options.safe {
369 return Err(ArrowError::ComputeError(format!(
370 "Failed to convert value at index {index}: conversion failed",
371 )));
372 } else {
373 builder.append_value(Variant::Null);
376 return Ok(());
377 }
378 };
379 )?
380 )?
381 builder.append_value(value);
382 }
383 Ok(())
384 }
385 }
386 };
387}
388
389define_row_builder!(
390 struct BooleanArrowToVariantBuilder<'a>,
391 |array| -> arrow::array::BooleanArray { array.as_boolean() }
392);
393
394define_row_builder!(
395 struct PrimitiveArrowToVariantBuilder<'a, T: ArrowPrimitiveType>
396 where T::Native: Into<Variant<'a, 'a>>,
397 |array| -> PrimitiveArray<T> { array.as_primitive() }
398);
399
400define_row_builder!(
401 struct DecimalArrowToVariantBuilder<'a, A: DecimalType, V>
402 where
403 V: VariantDecimalType<Native = A::Native>,
404 {
405 options: &'a CastOptions<'a>,
406 scale: i8,
407 },
408 |array| -> PrimitiveArray<A> { array.as_primitive() },
409 |value| -> Option<_> { V::try_new_with_signed_scale(value, *scale).ok() }
410);
411
412define_row_builder!(
414 struct Decimal256ArrowToVariantBuilder<'a> {
415 options: &'a CastOptions<'a>,
416 scale: i8,
417 },
418 |array| -> arrow::array::Decimal256Array { array.as_primitive() },
419 |value| -> Option<_> {
420 let value = value.to_i128();
421 value.and_then(|v| VariantDecimal16::try_new_with_signed_scale(v, *scale).ok())
422 }
423);
424
425define_row_builder!(
426 struct TimestampArrowToVariantBuilder<'a, T: ArrowTimestampType> {
427 options: &'a CastOptions<'a>,
428 has_time_zone: bool,
429 },
430 |array| -> PrimitiveArray<T> { array.as_primitive() },
431 |value| -> Option<_> {
432 as_datetime::<T>(value).map(|naive_datetime| {
434 if *has_time_zone {
435 let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&naive_datetime);
437 Variant::from(utc_dt) } else {
439 Variant::from(naive_datetime) }
442 })
443 }
444);
445
446define_row_builder!(
447 struct DateArrowToVariantBuilder<'a, T: ArrowTemporalType>
448 where
449 i64: From<T::Native>,
450 {
451 options: &'a CastOptions<'a>,
452 },
453 |array| -> PrimitiveArray<T> { array.as_primitive() },
454 |value| -> Option<_> {
455 let date_value = i64::from(value);
456 as_date::<T>(date_value)
457 }
458);
459
460define_row_builder!(
461 struct TimeArrowToVariantBuilder<'a, T: ArrowTemporalType>
462 where
463 i64: From<T::Native>,
464 {
465 options: &'a CastOptions<'a>,
466 },
467 |array| -> PrimitiveArray<T> { array.as_primitive() },
468 |value| -> Option<_> {
469 let time_value = i64::from(value);
470 as_time::<T>(time_value)
471 }
472);
473
474define_row_builder!(
475 struct BinaryArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
476 |array| -> GenericBinaryArray<O> { array.as_binary() }
477);
478
479define_row_builder!(
480 struct BinaryViewArrowToVariantBuilder<'a>,
481 |array| -> arrow::array::BinaryViewArray { array.as_byte_view() }
482);
483
484define_row_builder!(
485 struct FixedSizeBinaryArrowToVariantBuilder<'a>,
486 |array| -> arrow::array::FixedSizeBinaryArray { array.as_fixed_size_binary() }
487);
488
489define_row_builder!(
490 struct StringArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
491 |array| -> GenericStringArray<O> { array.as_string() }
492);
493
494define_row_builder!(
495 struct StringViewArrowToVariantBuilder<'a>,
496 |array| -> arrow::array::StringViewArray { array.as_string_view() }
497);
498
499pub(crate) struct NullArrowToVariantBuilder;
501
502impl NullArrowToVariantBuilder {
503 fn append_row(
504 &mut self,
505 builder: &mut impl VariantBuilderExt,
506 _index: usize,
507 ) -> Result<(), ArrowError> {
508 builder.append_null();
509 Ok(())
510 }
511}
512
513pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
516 list_array: &'a L,
517 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
518}
519
520impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
521 pub(crate) fn new(array: &'a L, options: &'a CastOptions) -> Result<Self, ArrowError> {
522 let values = array.values();
523 let values_builder =
524 make_arrow_to_variant_row_builder(values.data_type(), values, options)?;
525
526 Ok(Self {
527 list_array: array,
528 values_builder: Box::new(values_builder),
529 })
530 }
531
532 fn append_row(
533 &mut self,
534 builder: &mut impl VariantBuilderExt,
535 index: usize,
536 ) -> Result<(), ArrowError> {
537 if self.list_array.is_null(index) {
538 builder.append_null();
539 return Ok(());
540 }
541
542 let range = self.list_array.element_range(index);
543
544 let mut list_builder = builder.try_new_list()?;
545 for value_index in range {
546 self.values_builder
547 .append_row(&mut list_builder, value_index)?;
548 }
549 list_builder.finish();
550 Ok(())
551 }
552}
553
554pub(crate) struct StructArrowToVariantBuilder<'a> {
556 struct_array: &'a arrow::array::StructArray,
557 field_builders: Vec<(&'a str, ArrowToVariantRowBuilder<'a>)>,
558}
559
560impl<'a> StructArrowToVariantBuilder<'a> {
561 pub(crate) fn new(
562 struct_array: &'a arrow::array::StructArray,
563 options: &'a CastOptions,
564 ) -> Result<Self, ArrowError> {
565 let mut field_builders = Vec::new();
566
567 for (field_name, field_array) in struct_array
569 .column_names()
570 .iter()
571 .zip(struct_array.columns().iter())
572 {
573 let field_builder = make_arrow_to_variant_row_builder(
574 field_array.data_type(),
575 field_array.as_ref(),
576 options,
577 )?;
578 field_builders.push((*field_name, field_builder));
579 }
580
581 Ok(Self {
582 struct_array,
583 field_builders,
584 })
585 }
586
587 fn append_row(
588 &mut self,
589 builder: &mut impl VariantBuilderExt,
590 index: usize,
591 ) -> Result<(), ArrowError> {
592 if self.struct_array.is_null(index) {
593 builder.append_null();
594 } else {
595 let mut obj_builder = builder.try_new_object()?;
597
598 for (field_name, row_builder) in &mut self.field_builders {
600 let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
601 row_builder.append_row(&mut field_builder, index)?;
602 }
603
604 obj_builder.finish();
605 }
606 Ok(())
607 }
608}
609
610pub(crate) struct MapArrowToVariantBuilder<'a> {
612 map_array: &'a arrow::array::MapArray,
613 key_strings: arrow::array::StringArray,
614 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
615}
616
617impl<'a> MapArrowToVariantBuilder<'a> {
618 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
619 let map_array = array.as_map();
620
621 let keys = cast(map_array.keys(), &DataType::Utf8)?;
623 let key_strings = keys.as_string::<i32>().clone();
624
625 let values = map_array.values();
627 let values_builder =
628 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
629
630 Ok(Self {
631 map_array,
632 key_strings,
633 values_builder: Box::new(values_builder),
634 })
635 }
636
637 fn append_row(
638 &mut self,
639 builder: &mut impl VariantBuilderExt,
640 index: usize,
641 ) -> Result<(), ArrowError> {
642 if self.map_array.is_null(index) {
644 builder.append_null();
645 return Ok(());
646 }
647
648 let offsets = self.map_array.offsets();
649 let start = offsets[index].as_usize();
650 let end = offsets[index + 1].as_usize();
651
652 let mut object_builder = builder.try_new_object()?;
654
655 for kv_index in start..end {
657 let key = self.key_strings.value(kv_index);
658 let mut field_builder = ObjectFieldBuilder::new(key, &mut object_builder);
659 self.values_builder
660 .append_row(&mut field_builder, kv_index)?;
661 }
662
663 object_builder.finish();
664 Ok(())
665 }
666}
667
668pub(crate) struct UnionArrowToVariantBuilder<'a> {
672 union_array: &'a arrow::array::UnionArray,
673 child_builders: HashMap<i8, Box<ArrowToVariantRowBuilder<'a>>>,
674}
675
676impl<'a> UnionArrowToVariantBuilder<'a> {
677 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
678 let union_array = array.as_union();
679 let type_ids = union_array.type_ids();
680
681 let mut child_builders = HashMap::new();
683 for &type_id in type_ids {
684 let child_array = union_array.child(type_id);
685 let child_builder = make_arrow_to_variant_row_builder(
686 child_array.data_type(),
687 child_array.as_ref(),
688 options,
689 )?;
690 child_builders.insert(type_id, Box::new(child_builder));
691 }
692
693 Ok(Self {
694 union_array,
695 child_builders,
696 })
697 }
698
699 fn append_row(
700 &mut self,
701 builder: &mut impl VariantBuilderExt,
702 index: usize,
703 ) -> Result<(), ArrowError> {
704 let type_id = self.union_array.type_id(index);
705 let value_offset = self.union_array.value_offset(index);
706
707 match self.child_builders.get_mut(&type_id) {
709 Some(child_builder) => child_builder.append_row(builder, value_offset)?,
710 None => builder.append_null(),
711 }
712
713 Ok(())
714 }
715}
716
717pub(crate) struct DictionaryArrowToVariantBuilder<'a> {
719 keys: &'a dyn Array, normalized_keys: Vec<usize>,
721 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
722}
723
724impl<'a> DictionaryArrowToVariantBuilder<'a> {
725 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
726 let dict_array = array.as_any_dictionary();
727 let values = dict_array.values();
728 let values_builder =
729 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
730
731 let normalized_keys = match values.len() {
733 0 => Vec::new(),
734 _ => dict_array.normalized_keys(),
735 };
736
737 Ok(Self {
738 keys: dict_array.keys(),
739 normalized_keys,
740 values_builder: Box::new(values_builder),
741 })
742 }
743
744 fn append_row(
745 &mut self,
746 builder: &mut impl VariantBuilderExt,
747 index: usize,
748 ) -> Result<(), ArrowError> {
749 if self.keys.is_null(index) {
750 builder.append_null();
751 } else {
752 let normalized_key = self.normalized_keys[index];
753 self.values_builder.append_row(builder, normalized_key)?;
754 }
755 Ok(())
756 }
757}
758
759pub(crate) struct RunEndEncodedArrowToVariantBuilder<'a, R: RunEndIndexType> {
761 run_array: &'a arrow::array::RunArray<R>,
762 values_builder: Box<ArrowToVariantRowBuilder<'a>>,
763
764 run_ends: &'a [R::Native],
765 run_number: usize, run_start: usize, }
768
769impl<'a, R: RunEndIndexType> RunEndEncodedArrowToVariantBuilder<'a, R> {
770 pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
771 let Some(run_array) = array.as_run_opt() else {
772 return Err(ArrowError::CastError("Expected RunArray".to_string()));
773 };
774
775 let values = run_array.values();
776 let values_builder =
777 make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
778
779 Ok(Self {
780 run_array,
781 values_builder: Box::new(values_builder),
782 run_ends: run_array.run_ends().values(),
783 run_number: 0,
784 run_start: 0,
785 })
786 }
787
788 fn set_run_for_index(&mut self, index: usize) -> Result<(), ArrowError> {
789 if index >= self.run_start {
790 let Some(run_end) = self.run_ends.get(self.run_number) else {
791 return Err(ArrowError::CastError(format!(
792 "Index {index} beyond run array"
793 )));
794 };
795 if index < run_end.as_usize() {
796 return Ok(());
797 }
798 if index == run_end.as_usize() {
799 self.run_number += 1;
800 self.run_start = run_end.as_usize();
801 return Ok(());
802 }
803 }
804
805 let run_number = self
807 .run_ends
808 .partition_point(|&run_end| run_end.as_usize() <= index);
809 if run_number >= self.run_ends.len() {
810 return Err(ArrowError::CastError(format!(
811 "Index {index} beyond run array"
812 )));
813 }
814 self.run_number = run_number;
815 self.run_start = match run_number {
816 0 => 0,
817 _ => self.run_ends[run_number - 1].as_usize(),
818 };
819 Ok(())
820 }
821
822 fn append_row(
823 &mut self,
824 builder: &mut impl VariantBuilderExt,
825 index: usize,
826 ) -> Result<(), ArrowError> {
827 self.set_run_for_index(index)?;
828
829 if self.run_array.values().is_null(self.run_number) {
831 builder.append_null();
832 return Ok(());
833 }
834
835 self.values_builder.append_row(builder, self.run_number)?;
837
838 Ok(())
839 }
840}
841
842#[cfg(test)]
843mod tests {
844 use super::*;
845 use crate::{VariantArray, VariantArrayBuilder};
846 use arrow::array::{ArrayRef, BooleanArray, Int32Array, StringArray};
847 use arrow::datatypes::Int32Type;
848 use std::sync::Arc;
849
850 fn execute_row_builder_test(array: &dyn Array) -> VariantArray {
852 execute_row_builder_test_with_options(
853 array,
854 CastOptions {
855 safe: false,
856 ..Default::default()
857 },
858 )
859 }
860
861 fn execute_row_builder_test_with_options(
863 array: &dyn Array,
864 options: CastOptions,
865 ) -> VariantArray {
866 let mut row_builder =
867 make_arrow_to_variant_row_builder(array.data_type(), array, &options).unwrap();
868
869 let mut array_builder = VariantArrayBuilder::new(array.len());
870
871 for i in 0..array.len() {
873 row_builder.append_row(&mut array_builder, i).unwrap();
874 }
875
876 let variant_array = array_builder.build();
877 assert_eq!(variant_array.len(), array.len());
878 variant_array
879 }
880
881 fn test_row_builder_basic(array: &dyn Array, expected_values: Vec<Option<Variant>>) {
884 test_row_builder_basic_with_options(
885 array,
886 expected_values,
887 CastOptions {
888 safe: false,
889 ..Default::default()
890 },
891 );
892 }
893
894 fn test_row_builder_basic_with_options(
896 array: &dyn Array,
897 expected_values: Vec<Option<Variant>>,
898 options: CastOptions,
899 ) {
900 let variant_array = execute_row_builder_test_with_options(array, options);
901
902 for (i, expected) in expected_values.iter().enumerate() {
904 match expected {
905 Some(variant) => {
906 assert_eq!(variant_array.value(i), *variant, "Mismatch at index {}", i)
907 }
908 None => assert!(variant_array.is_null(i), "Expected null at index {}", i),
909 }
910 }
911 }
912
913 #[test]
914 fn test_primitive_row_builder() {
915 let int_array = Int32Array::from(vec![Some(42), None, Some(100)]);
916 test_row_builder_basic(
917 &int_array,
918 vec![Some(Variant::Int32(42)), None, Some(Variant::Int32(100))],
919 );
920 }
921
922 #[test]
923 fn test_string_row_builder() {
924 let string_array = StringArray::from(vec![Some("hello"), None, Some("world")]);
925 test_row_builder_basic(
926 &string_array,
927 vec![
928 Some(Variant::from("hello")),
929 None,
930 Some(Variant::from("world")),
931 ],
932 );
933 }
934
935 #[test]
936 fn test_boolean_row_builder() {
937 let bool_array = BooleanArray::from(vec![Some(true), None, Some(false)]);
938 test_row_builder_basic(
939 &bool_array,
940 vec![Some(Variant::from(true)), None, Some(Variant::from(false))],
941 );
942 }
943
944 #[test]
945 fn test_struct_row_builder() {
946 use arrow::array::{ArrayRef, Int32Array, StringArray, StructArray};
947 use arrow_schema::{DataType, Field};
948 use std::sync::Arc;
949
950 let int_field = Field::new("id", DataType::Int32, true);
952 let string_field = Field::new("name", DataType::Utf8, true);
953
954 let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
955 let string_array = StringArray::from(vec![Some("Alice"), Some("Bob"), None]);
956
957 let struct_array = StructArray::try_new(
958 vec![int_field, string_field].into(),
959 vec![
960 Arc::new(int_array) as ArrayRef,
961 Arc::new(string_array) as ArrayRef,
962 ],
963 None,
964 )
965 .unwrap();
966
967 let variant_array = execute_row_builder_test(&struct_array);
968
969 let first_variant = variant_array.value(0);
971 assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
972 assert_eq!(
973 first_variant.get_object_field("name"),
974 Some(Variant::from("Alice"))
975 );
976
977 let second_variant = variant_array.value(1);
979 assert_eq!(second_variant.get_object_field("id"), None); assert_eq!(
981 second_variant.get_object_field("name"),
982 Some(Variant::from("Bob"))
983 );
984
985 let third_variant = variant_array.value(2);
987 assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(3)));
988 assert_eq!(third_variant.get_object_field("name"), None); }
990
991 #[test]
992 fn test_run_end_encoded_row_builder() {
993 use arrow::array::{Int32Array, RunArray};
994 use arrow::datatypes::Int32Type;
995
996 let values = StringArray::from(vec!["A", "B", "C"]);
1000 let run_ends = Int32Array::from(vec![2, 5, 6]);
1001 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1002
1003 let variant_array = execute_row_builder_test(&run_array);
1004
1005 assert_eq!(variant_array.value(0), Variant::from("A")); assert_eq!(variant_array.value(1), Variant::from("A")); assert_eq!(variant_array.value(2), Variant::from("B")); assert_eq!(variant_array.value(3), Variant::from("B")); assert_eq!(variant_array.value(4), Variant::from("B")); assert_eq!(variant_array.value(5), Variant::from("C")); }
1013
1014 #[test]
1015 fn test_run_end_encoded_random_access() {
1016 use arrow::array::{Int32Array, RunArray};
1017 use arrow::datatypes::Int32Type;
1018
1019 let values = StringArray::from(vec!["A", "B", "C"]);
1021 let run_ends = Int32Array::from(vec![2, 5, 6]);
1022 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1023
1024 let options = CastOptions {
1025 safe: false,
1026 ..Default::default()
1027 };
1028 let mut row_builder =
1029 make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1030
1031 let access_pattern = [0, 5, 2, 4, 1, 3]; let expected_values = ["A", "C", "B", "B", "A", "B"];
1034
1035 for (i, &index) in access_pattern.iter().enumerate() {
1036 let mut array_builder = VariantArrayBuilder::new(1);
1037 row_builder.append_row(&mut array_builder, index).unwrap();
1038 let variant_array = array_builder.build();
1039 assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1040 }
1041 }
1042
1043 #[test]
1044 fn test_run_end_encoded_with_nulls() {
1045 use arrow::array::{Int32Array, RunArray};
1046 use arrow::datatypes::Int32Type;
1047
1048 let values = StringArray::from(vec![Some("A"), None, Some("B")]);
1050 let run_ends = Int32Array::from(vec![2, 4, 5]);
1051 let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1052
1053 let options = CastOptions {
1054 safe: false,
1055 ..Default::default()
1056 };
1057 let mut row_builder =
1058 make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1059 let mut array_builder = VariantArrayBuilder::new(5);
1060
1061 for i in 0..5 {
1063 row_builder.append_row(&mut array_builder, i).unwrap();
1064 }
1065
1066 let variant_array = array_builder.build();
1067 assert_eq!(variant_array.len(), 5);
1068
1069 assert_eq!(variant_array.value(0), Variant::from("A")); assert_eq!(variant_array.value(1), Variant::from("A")); assert!(variant_array.is_null(2)); assert!(variant_array.is_null(3)); assert_eq!(variant_array.value(4), Variant::from("B")); }
1076
1077 #[test]
1078 fn test_dictionary_row_builder() {
1079 use arrow::array::{DictionaryArray, Int32Array};
1080 use arrow::datatypes::Int32Type;
1081
1082 let values = StringArray::from(vec!["apple", "banana", "cherry"]);
1084 let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1085 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1086
1087 let variant_array = execute_row_builder_test(&dict_array);
1088
1089 assert_eq!(variant_array.value(0), Variant::from("apple")); assert_eq!(variant_array.value(1), Variant::from("banana")); assert_eq!(variant_array.value(2), Variant::from("apple")); assert_eq!(variant_array.value(3), Variant::from("cherry")); assert_eq!(variant_array.value(4), Variant::from("banana")); }
1096
1097 #[test]
1098 fn test_dictionary_with_nulls() {
1099 use arrow::array::{DictionaryArray, Int32Array};
1100 use arrow::datatypes::Int32Type;
1101
1102 let values = StringArray::from(vec!["x", "y", "z"]);
1104 let keys = Int32Array::from(vec![Some(0), None, Some(1), None, Some(2)]);
1105 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1106
1107 let options = CastOptions {
1108 safe: false,
1109 ..Default::default()
1110 };
1111 let mut row_builder =
1112 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1113 .unwrap();
1114 let mut array_builder = VariantArrayBuilder::new(5);
1115
1116 for i in 0..5 {
1118 row_builder.append_row(&mut array_builder, i).unwrap();
1119 }
1120
1121 let variant_array = array_builder.build();
1122 assert_eq!(variant_array.len(), 5);
1123
1124 assert_eq!(variant_array.value(0), Variant::from("x")); assert!(variant_array.is_null(1)); assert_eq!(variant_array.value(2), Variant::from("y")); assert!(variant_array.is_null(3)); assert_eq!(variant_array.value(4), Variant::from("z")); }
1131
1132 #[test]
1133 fn test_dictionary_random_access() {
1134 use arrow::array::{DictionaryArray, Int32Array};
1135 use arrow::datatypes::Int32Type;
1136
1137 let values = StringArray::from(vec!["red", "green", "blue"]);
1139 let keys = Int32Array::from(vec![0, 1, 2, 0, 1, 2]);
1140 let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1141
1142 let options = CastOptions {
1143 safe: false,
1144 ..Default::default()
1145 };
1146 let mut row_builder =
1147 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1148 .unwrap();
1149
1150 let access_pattern = [5, 0, 3, 1, 4, 2]; let expected_values = ["blue", "red", "red", "green", "green", "blue"];
1153
1154 for (i, &index) in access_pattern.iter().enumerate() {
1155 let mut array_builder = VariantArrayBuilder::new(1);
1156 row_builder.append_row(&mut array_builder, index).unwrap();
1157 let variant_array = array_builder.build();
1158 assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1159 }
1160 }
1161
1162 #[test]
1163 fn test_nested_dictionary() {
1164 use arrow::array::{DictionaryArray, Int32Array, StructArray};
1165 use arrow::datatypes::{Field, Int32Type};
1166
1167 let id_array = Int32Array::from(vec![1, 2, 3]);
1169 let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
1170 let struct_array = StructArray::from(vec![
1171 (
1172 Arc::new(Field::new("id", DataType::Int32, false)),
1173 Arc::new(id_array) as ArrayRef,
1174 ),
1175 (
1176 Arc::new(Field::new("name", DataType::Utf8, false)),
1177 Arc::new(name_array) as ArrayRef,
1178 ),
1179 ]);
1180
1181 let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1182 let dict_array =
1183 DictionaryArray::<Int32Type>::try_new(keys, Arc::new(struct_array)).unwrap();
1184
1185 let options = CastOptions {
1186 safe: false,
1187 ..Default::default()
1188 };
1189 let mut row_builder =
1190 make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1191 .unwrap();
1192 let mut array_builder = VariantArrayBuilder::new(5);
1193
1194 for i in 0..5 {
1196 row_builder.append_row(&mut array_builder, i).unwrap();
1197 }
1198
1199 let variant_array = array_builder.build();
1200 assert_eq!(variant_array.len(), 5);
1201
1202 let first_variant = variant_array.value(0);
1204 assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1205 assert_eq!(
1206 first_variant.get_object_field("name"),
1207 Some(Variant::from("Alice"))
1208 );
1209
1210 let second_variant = variant_array.value(1);
1211 assert_eq!(
1212 second_variant.get_object_field("id"),
1213 Some(Variant::from(2))
1214 );
1215 assert_eq!(
1216 second_variant.get_object_field("name"),
1217 Some(Variant::from("Bob"))
1218 );
1219
1220 let third_variant = variant_array.value(2);
1222 assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(1)));
1223 assert_eq!(
1224 third_variant.get_object_field("name"),
1225 Some(Variant::from("Alice"))
1226 );
1227 }
1228
1229 #[test]
1230 fn test_list_row_builder() {
1231 use arrow::array::ListArray;
1232
1233 let data = vec![
1235 Some(vec![Some(1), Some(2)]),
1236 Some(vec![Some(3), Some(4), Some(5)]),
1237 None,
1238 Some(vec![]),
1239 ];
1240 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1241
1242 let variant_array = execute_row_builder_test(&list_array);
1243
1244 let row0 = variant_array.value(0);
1246 let list0 = row0.as_list().unwrap();
1247 assert_eq!(list0.len(), 2);
1248 assert_eq!(list0.get(0), Some(Variant::from(1)));
1249 assert_eq!(list0.get(1), Some(Variant::from(2)));
1250
1251 let row1 = variant_array.value(1);
1253 let list1 = row1.as_list().unwrap();
1254 assert_eq!(list1.len(), 3);
1255 assert_eq!(list1.get(0), Some(Variant::from(3)));
1256 assert_eq!(list1.get(1), Some(Variant::from(4)));
1257 assert_eq!(list1.get(2), Some(Variant::from(5)));
1258
1259 assert!(variant_array.is_null(2));
1261
1262 let row3 = variant_array.value(3);
1264 let list3 = row3.as_list().unwrap();
1265 assert_eq!(list3.len(), 0);
1266 }
1267
1268 #[test]
1269 fn test_sliced_list_row_builder() {
1270 use arrow::array::ListArray;
1271
1272 let data = vec![
1274 Some(vec![Some(1), Some(2)]),
1275 Some(vec![Some(3), Some(4), Some(5)]),
1276 Some(vec![Some(6)]),
1277 ];
1278 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1279
1280 let sliced_array = list_array.slice(1, 1);
1282
1283 let options = CastOptions {
1284 safe: false,
1285 ..Default::default()
1286 };
1287 let mut row_builder =
1288 make_arrow_to_variant_row_builder(sliced_array.data_type(), &sliced_array, &options)
1289 .unwrap();
1290 let mut variant_array_builder = VariantArrayBuilder::new(sliced_array.len());
1291
1292 row_builder
1294 .append_row(&mut variant_array_builder, 0)
1295 .unwrap();
1296 let variant_array = variant_array_builder.build();
1297
1298 assert_eq!(variant_array.len(), 1);
1300
1301 let row0 = variant_array.value(0);
1303 let list0 = row0.as_list().unwrap();
1304 assert_eq!(list0.len(), 3);
1305 assert_eq!(list0.get(0), Some(Variant::from(3)));
1306 assert_eq!(list0.get(1), Some(Variant::from(4)));
1307 assert_eq!(list0.get(2), Some(Variant::from(5)));
1308 }
1309
1310 #[test]
1311 fn test_nested_list_row_builder() {
1312 use arrow::array::ListArray;
1313 use arrow::datatypes::Field;
1314
1315 let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
1317 let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_field), true));
1318
1319 let values_data = vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])];
1320 let values_list = ListArray::from_iter_primitive::<Int32Type, _, _>(values_data);
1321
1322 let outer_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 2].into());
1323 let outer_list = ListArray::new(
1324 inner_list_field,
1325 outer_offsets,
1326 Arc::new(values_list),
1327 Some(arrow::buffer::NullBuffer::from(vec![true, false])),
1328 );
1329
1330 let options = CastOptions {
1331 safe: false,
1332 ..Default::default()
1333 };
1334 let mut row_builder =
1335 make_arrow_to_variant_row_builder(outer_list.data_type(), &outer_list, &options)
1336 .unwrap();
1337 let mut variant_array_builder = VariantArrayBuilder::new(outer_list.len());
1338
1339 for i in 0..outer_list.len() {
1340 row_builder
1341 .append_row(&mut variant_array_builder, i)
1342 .unwrap();
1343 }
1344
1345 let variant_array = variant_array_builder.build();
1346
1347 assert_eq!(variant_array.len(), 2);
1349
1350 let row0 = variant_array.value(0);
1352 let outer_list0 = row0.as_list().unwrap();
1353 assert_eq!(outer_list0.len(), 2);
1354
1355 let inner_list0_0 = outer_list0.get(0).unwrap();
1356 let inner_list0_0 = inner_list0_0.as_list().unwrap();
1357 assert_eq!(inner_list0_0.len(), 2);
1358 assert_eq!(inner_list0_0.get(0), Some(Variant::from(1)));
1359 assert_eq!(inner_list0_0.get(1), Some(Variant::from(2)));
1360
1361 let inner_list0_1 = outer_list0.get(1).unwrap();
1362 let inner_list0_1 = inner_list0_1.as_list().unwrap();
1363 assert_eq!(inner_list0_1.len(), 1);
1364 assert_eq!(inner_list0_1.get(0), Some(Variant::from(3)));
1365
1366 assert!(variant_array.is_null(1));
1368 }
1369
1370 #[test]
1371 fn test_map_row_builder() {
1372 use arrow::array::{Int32Array, MapArray, StringArray, StructArray};
1373 use arrow::buffer::{NullBuffer, OffsetBuffer};
1374 use arrow::datatypes::{DataType, Field, Fields};
1375 use std::sync::Arc;
1376
1377 let keys = StringArray::from(vec!["key1", "key2", "key3"]);
1379 let values = Int32Array::from(vec![1, 2, 3]);
1380 let entries_fields = Fields::from(vec![
1381 Field::new("key", DataType::Utf8, false),
1382 Field::new("value", DataType::Int32, true),
1383 ]);
1384 let entries = StructArray::new(
1385 entries_fields.clone(),
1386 vec![Arc::new(keys), Arc::new(values)],
1387 None, );
1389
1390 let offsets = OffsetBuffer::new(vec![0, 1, 1, 1, 3].into());
1396
1397 let null_buffer = Some(NullBuffer::from(vec![true, true, false, true]));
1399
1400 let map_field = Arc::new(Field::new(
1402 "entries",
1403 DataType::Struct(entries_fields),
1404 false, ));
1406
1407 let map_array = MapArray::try_new(
1409 map_field,
1410 offsets,
1411 entries,
1412 null_buffer,
1413 false, )
1415 .unwrap();
1416
1417 let variant_array = execute_row_builder_test(&map_array);
1418
1419 let map0 = variant_array.value(0);
1421 let obj0 = map0.as_object().unwrap();
1422 assert_eq!(obj0.len(), 1);
1423 assert_eq!(obj0.get("key1"), Some(Variant::from(1)));
1424
1425 let map1 = variant_array.value(1);
1427 let obj1 = map1.as_object().unwrap();
1428 assert_eq!(obj1.len(), 0); assert!(variant_array.is_null(2));
1432
1433 let map3 = variant_array.value(3);
1435 let obj3 = map3.as_object().unwrap();
1436 assert_eq!(obj3.len(), 2);
1437 assert_eq!(obj3.get("key2"), Some(Variant::from(2)));
1438 assert_eq!(obj3.get("key3"), Some(Variant::from(3)));
1439 }
1440
1441 #[test]
1442 fn test_union_sparse_row_builder() {
1443 use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1444 use arrow::buffer::ScalarBuffer;
1445 use arrow::datatypes::{DataType, Field, UnionFields};
1446 use std::sync::Arc;
1447
1448 let int_array = Int32Array::from(vec![Some(1), None, None, None, Some(34), None]);
1450 let float_array = Float64Array::from(vec![None, Some(3.2), None, Some(32.5), None, None]);
1451 let string_array = StringArray::from(vec![None, None, Some("hello"), None, None, None]);
1452 let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1453
1454 let union_fields = UnionFields::from_fields(vec![
1455 Field::new("int_field", DataType::Int32, false),
1456 Field::new("float_field", DataType::Float64, false),
1457 Field::new("string_field", DataType::Utf8, false),
1458 ]);
1459
1460 let children: Vec<Arc<dyn Array>> = vec![
1461 Arc::new(int_array),
1462 Arc::new(float_array),
1463 Arc::new(string_array),
1464 ];
1465
1466 let union_array = UnionArray::try_new(
1467 union_fields,
1468 type_ids,
1469 None, children,
1471 )
1472 .unwrap();
1473
1474 let variant_array = execute_row_builder_test(&union_array);
1475 assert_eq!(variant_array.value(0), Variant::Int32(1));
1476 assert_eq!(variant_array.value(1), Variant::Double(3.2));
1477 assert_eq!(variant_array.value(2), Variant::from("hello"));
1478 assert_eq!(variant_array.value(3), Variant::Double(32.5));
1479 assert_eq!(variant_array.value(4), Variant::Int32(34));
1480 assert!(variant_array.is_null(5));
1481 }
1482
1483 #[test]
1484 fn test_union_dense_row_builder() {
1485 use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1486 use arrow::buffer::ScalarBuffer;
1487 use arrow::datatypes::{DataType, Field, UnionFields};
1488 use std::sync::Arc;
1489
1490 let int_array = Int32Array::from(vec![Some(1), Some(34), None]);
1492 let float_array = Float64Array::from(vec![3.2, 32.5]);
1493 let string_array = StringArray::from(vec!["hello"]);
1494 let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1495 let offsets = [0, 0, 0, 1, 1, 2]
1496 .into_iter()
1497 .collect::<ScalarBuffer<i32>>();
1498
1499 let union_fields = UnionFields::from_fields(vec![
1500 Field::new("int_field", DataType::Int32, false),
1501 Field::new("float_field", DataType::Float64, false),
1502 Field::new("string_field", DataType::Utf8, false),
1503 ]);
1504
1505 let children: Vec<Arc<dyn Array>> = vec![
1506 Arc::new(int_array),
1507 Arc::new(float_array),
1508 Arc::new(string_array),
1509 ];
1510
1511 let union_array = UnionArray::try_new(
1512 union_fields,
1513 type_ids,
1514 Some(offsets), children,
1516 )
1517 .unwrap();
1518
1519 let options = CastOptions {
1521 safe: false,
1522 ..Default::default()
1523 };
1524 let mut row_builder =
1525 make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1526 .unwrap();
1527
1528 let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1529 for i in 0..union_array.len() {
1530 row_builder.append_row(&mut variant_builder, i).unwrap();
1531 }
1532 let variant_array = variant_builder.build();
1533
1534 assert_eq!(variant_array.len(), 6);
1535 assert_eq!(variant_array.value(0), Variant::Int32(1));
1536 assert_eq!(variant_array.value(1), Variant::Double(3.2));
1537 assert_eq!(variant_array.value(2), Variant::from("hello"));
1538 assert_eq!(variant_array.value(3), Variant::Double(32.5));
1539 assert_eq!(variant_array.value(4), Variant::Int32(34));
1540 assert!(variant_array.is_null(5));
1541 }
1542
1543 #[test]
1544 fn test_union_sparse_type_ids_row_builder() {
1545 use arrow::array::{Int32Array, StringArray, UnionArray};
1546 use arrow::buffer::ScalarBuffer;
1547 use arrow::datatypes::{DataType, Field, UnionFields};
1548 use std::sync::Arc;
1549
1550 let int_array = Int32Array::from(vec![Some(42), None]);
1552 let string_array = StringArray::from(vec![None, Some("test")]);
1553 let type_ids = [1, 3].into_iter().collect::<ScalarBuffer<i8>>();
1554
1555 let union_fields = UnionFields::try_new(
1556 vec![1, 3], vec![
1558 Field::new("int_field", DataType::Int32, false),
1559 Field::new("string_field", DataType::Utf8, false),
1560 ],
1561 )
1562 .unwrap();
1563
1564 let children: Vec<Arc<dyn Array>> = vec![Arc::new(int_array), Arc::new(string_array)];
1565
1566 let union_array = UnionArray::try_new(
1567 union_fields,
1568 type_ids,
1569 None, children,
1571 )
1572 .unwrap();
1573
1574 let options = CastOptions {
1576 safe: false,
1577 ..Default::default()
1578 };
1579 let mut row_builder =
1580 make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1581 .unwrap();
1582
1583 let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1584 for i in 0..union_array.len() {
1585 row_builder.append_row(&mut variant_builder, i).unwrap();
1586 }
1587 let variant_array = variant_builder.build();
1588
1589 assert_eq!(variant_array.len(), 2);
1591
1592 assert_eq!(variant_array.value(0), Variant::Int32(42));
1594
1595 assert_eq!(variant_array.value(1), Variant::from("test"));
1597 }
1598
1599 #[test]
1600 fn test_decimal32_row_builder() {
1601 use arrow::array::Decimal32Array;
1602 use parquet_variant::VariantDecimal4;
1603
1604 let decimal_array = Decimal32Array::from(vec![Some(1234), None, Some(-5678)])
1606 .with_precision_and_scale(9, 2)
1607 .unwrap();
1608
1609 test_row_builder_basic(
1610 &decimal_array,
1611 vec![
1612 Some(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())),
1613 None,
1614 Some(Variant::from(VariantDecimal4::try_new(-5678, 2).unwrap())),
1615 ],
1616 );
1617 }
1618
1619 #[test]
1620 fn test_decimal128_row_builder() {
1621 use arrow::array::Decimal128Array;
1622 use parquet_variant::VariantDecimal16;
1623
1624 let decimal_array = Decimal128Array::from(vec![Some(123), None, Some(456)])
1626 .with_precision_and_scale(10, -2)
1627 .unwrap();
1628
1629 test_row_builder_basic(
1630 &decimal_array,
1631 vec![
1632 Some(Variant::from(VariantDecimal16::try_new(12300, 0).unwrap())),
1633 None,
1634 Some(Variant::from(VariantDecimal16::try_new(45600, 0).unwrap())),
1635 ],
1636 );
1637 }
1638
1639 #[test]
1640 fn test_decimal256_overflow_row_builder() {
1641 use arrow::array::Decimal256Array;
1642 use arrow::datatypes::i256;
1643
1644 let large_value = i256::from_i128(i128::MAX) + i256::from(1); let decimal_array = Decimal256Array::from(vec![Some(large_value), Some(i256::from(123))])
1647 .with_precision_and_scale(76, 3)
1648 .unwrap();
1649
1650 test_row_builder_basic_with_options(
1651 &decimal_array,
1652 vec![
1653 Some(Variant::Null), Some(Variant::from(VariantDecimal16::try_new(123, 3).unwrap())),
1655 ],
1656 CastOptions::default(),
1657 );
1658 }
1659
1660 #[test]
1661 fn test_binary_row_builder() {
1662 use arrow::array::BinaryArray;
1663
1664 let binary_data = vec![
1665 Some(b"hello".as_slice()),
1666 None,
1667 Some(b"\x00\x01\x02\xFF".as_slice()),
1668 Some(b"".as_slice()), ];
1670 let binary_array = BinaryArray::from(binary_data);
1671
1672 test_row_builder_basic(
1673 &binary_array,
1674 vec![
1675 Some(Variant::from(b"hello".as_slice())),
1676 None,
1677 Some(Variant::from([0x00, 0x01, 0x02, 0xFF].as_slice())),
1678 Some(Variant::from([].as_slice())),
1679 ],
1680 );
1681 }
1682
1683 #[test]
1684 fn test_binary_view_row_builder() {
1685 use arrow::array::BinaryViewArray;
1686
1687 let binary_data = vec![
1688 Some(b"short".as_slice()),
1689 None,
1690 Some(b"this is a longer binary view that exceeds inline storage".as_slice()),
1691 ];
1692 let binary_view_array = BinaryViewArray::from(binary_data);
1693
1694 test_row_builder_basic(
1695 &binary_view_array,
1696 vec![
1697 Some(Variant::from(b"short".as_slice())),
1698 None,
1699 Some(Variant::from(
1700 b"this is a longer binary view that exceeds inline storage".as_slice(),
1701 )),
1702 ],
1703 );
1704 }
1705
1706 #[test]
1707 fn test_fixed_size_binary_row_builder() {
1708 use arrow::array::FixedSizeBinaryArray;
1709
1710 let binary_data = vec![
1711 Some([0x01, 0x02, 0x03, 0x04]),
1712 None,
1713 Some([0xFF, 0xFE, 0xFD, 0xFC]),
1714 ];
1715 let fixed_binary_array =
1716 FixedSizeBinaryArray::try_from_sparse_iter_with_size(binary_data.into_iter(), 4)
1717 .unwrap();
1718
1719 test_row_builder_basic(
1720 &fixed_binary_array,
1721 vec![
1722 Some(Variant::from([0x01, 0x02, 0x03, 0x04].as_slice())),
1723 None,
1724 Some(Variant::from([0xFF, 0xFE, 0xFD, 0xFC].as_slice())),
1725 ],
1726 );
1727 }
1728
1729 #[test]
1730 fn test_utf8_view_row_builder() {
1731 use arrow::array::StringViewArray;
1732
1733 let string_data = vec![
1734 Some("short"),
1735 None,
1736 Some("this is a much longer string that will be stored out-of-line in the buffer"),
1737 ];
1738 let string_view_array = StringViewArray::from(string_data);
1739
1740 test_row_builder_basic(
1741 &string_view_array,
1742 vec![
1743 Some(Variant::from("short")),
1744 None,
1745 Some(Variant::from(
1746 "this is a much longer string that will be stored out-of-line in the buffer",
1747 )),
1748 ],
1749 );
1750 }
1751
1752 #[test]
1753 fn test_timestamp_second_row_builder() {
1754 use arrow::array::TimestampSecondArray;
1755
1756 let timestamp_data = vec![
1757 Some(1609459200), None,
1759 Some(1640995200), ];
1761 let timestamp_array = TimestampSecondArray::from(timestamp_data);
1762
1763 let expected_naive1 = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1764 let expected_naive2 = DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc();
1765
1766 test_row_builder_basic(
1767 ×tamp_array,
1768 vec![
1769 Some(Variant::from(expected_naive1)),
1770 None,
1771 Some(Variant::from(expected_naive2)),
1772 ],
1773 );
1774 }
1775
1776 #[test]
1777 fn test_timestamp_with_timezone_row_builder() {
1778 use arrow::array::TimestampMicrosecondArray;
1779 use chrono::DateTime;
1780
1781 let timestamp_data = vec![
1782 Some(1609459200000000), None,
1784 Some(1640995200000000), ];
1786 let timezone = "UTC".to_string();
1787 let timestamp_array =
1788 TimestampMicrosecondArray::from(timestamp_data).with_timezone(timezone);
1789
1790 let expected_utc1 = DateTime::from_timestamp(1609459200, 0).unwrap();
1791 let expected_utc2 = DateTime::from_timestamp(1640995200, 0).unwrap();
1792
1793 test_row_builder_basic(
1794 ×tamp_array,
1795 vec![
1796 Some(Variant::from(expected_utc1)),
1797 None,
1798 Some(Variant::from(expected_utc2)),
1799 ],
1800 );
1801 }
1802
1803 #[test]
1804 fn test_timestamp_nanosecond_precision_row_builder() {
1805 use arrow::array::TimestampNanosecondArray;
1806
1807 let timestamp_data = vec![
1808 Some(1609459200123456789), None,
1810 Some(1609459200000000000), ];
1812 let timestamp_array = TimestampNanosecondArray::from(timestamp_data);
1813
1814 let expected_with_nanos = DateTime::from_timestamp(1609459200, 123456789)
1815 .unwrap()
1816 .naive_utc();
1817 let expected_no_nanos = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1818
1819 test_row_builder_basic(
1820 ×tamp_array,
1821 vec![
1822 Some(Variant::from(expected_with_nanos)),
1823 None,
1824 Some(Variant::from(expected_no_nanos)),
1825 ],
1826 );
1827 }
1828
1829 #[test]
1830 fn test_timestamp_millisecond_row_builder() {
1831 use arrow::array::TimestampMillisecondArray;
1832
1833 let timestamp_data = vec![
1834 Some(1609459200123), None,
1836 Some(1609459200000), ];
1838 let timestamp_array = TimestampMillisecondArray::from(timestamp_data);
1839
1840 let expected_with_millis = DateTime::from_timestamp(1609459200, 123000000)
1841 .unwrap()
1842 .naive_utc();
1843 let expected_no_millis = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1844
1845 test_row_builder_basic(
1846 ×tamp_array,
1847 vec![
1848 Some(Variant::from(expected_with_millis)),
1849 None,
1850 Some(Variant::from(expected_no_millis)),
1851 ],
1852 );
1853 }
1854
1855 #[test]
1856 fn test_date32_row_builder() {
1857 use arrow::array::Date32Array;
1858 use chrono::NaiveDate;
1859
1860 let date_data = vec![
1861 Some(0), None,
1863 Some(19723), Some(-719162), ];
1866 let date_array = Date32Array::from(date_data);
1867
1868 let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1869 let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1870 let expected_min = NaiveDate::from_ymd_opt(1, 1, 1).unwrap();
1871
1872 test_row_builder_basic(
1873 &date_array,
1874 vec![
1875 Some(Variant::from(expected_epoch)),
1876 None,
1877 Some(Variant::from(expected_2024)),
1878 Some(Variant::from(expected_min)),
1879 ],
1880 );
1881 }
1882
1883 #[test]
1884 fn test_date64_row_builder() {
1885 use arrow::array::Date64Array;
1886 use chrono::NaiveDate;
1887
1888 let date_data = vec![
1890 Some(0), None,
1892 Some(1704067200000), Some(86400000), ];
1895 let date_array = Date64Array::from(date_data);
1896
1897 let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1898 let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1899 let expected_next_day = NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();
1900
1901 test_row_builder_basic(
1902 &date_array,
1903 vec![
1904 Some(Variant::from(expected_epoch)),
1905 None,
1906 Some(Variant::from(expected_2024)),
1907 Some(Variant::from(expected_next_day)),
1908 ],
1909 );
1910 }
1911
1912 #[test]
1913 fn test_time32_second_row_builder() {
1914 use arrow::array::Time32SecondArray;
1915 use chrono::NaiveTime;
1916
1917 let time_data = vec![
1919 Some(0), None,
1921 Some(3661), Some(86399), ];
1924 let time_array = Time32SecondArray::from(time_data);
1925
1926 let expected_midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
1927 let expected_time = NaiveTime::from_hms_opt(1, 1, 1).unwrap();
1928 let expected_last = NaiveTime::from_hms_opt(23, 59, 59).unwrap();
1929
1930 test_row_builder_basic(
1931 &time_array,
1932 vec![
1933 Some(Variant::from(expected_midnight)),
1934 None,
1935 Some(Variant::from(expected_time)),
1936 Some(Variant::from(expected_last)),
1937 ],
1938 );
1939 }
1940
1941 #[test]
1942 fn test_time32_millisecond_row_builder() {
1943 use arrow::array::Time32MillisecondArray;
1944 use chrono::NaiveTime;
1945
1946 let time_data = vec![
1948 Some(0), None,
1950 Some(3661123), Some(86399999), ];
1953 let time_array = Time32MillisecondArray::from(time_data);
1954
1955 let expected_midnight = NaiveTime::from_hms_milli_opt(0, 0, 0, 0).unwrap();
1956 let expected_time = NaiveTime::from_hms_milli_opt(1, 1, 1, 123).unwrap();
1957 let expected_last = NaiveTime::from_hms_milli_opt(23, 59, 59, 999).unwrap();
1958
1959 test_row_builder_basic(
1960 &time_array,
1961 vec![
1962 Some(Variant::from(expected_midnight)),
1963 None,
1964 Some(Variant::from(expected_time)),
1965 Some(Variant::from(expected_last)),
1966 ],
1967 );
1968 }
1969
1970 #[test]
1971 fn test_time64_microsecond_row_builder() {
1972 use arrow::array::Time64MicrosecondArray;
1973 use chrono::NaiveTime;
1974
1975 let time_data = vec![
1977 Some(0), None,
1979 Some(3661123456), Some(86399999999), ];
1982 let time_array = Time64MicrosecondArray::from(time_data);
1983
1984 let expected_midnight = NaiveTime::from_hms_micro_opt(0, 0, 0, 0).unwrap();
1985 let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
1986 let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
1987
1988 test_row_builder_basic(
1989 &time_array,
1990 vec![
1991 Some(Variant::from(expected_midnight)),
1992 None,
1993 Some(Variant::from(expected_time)),
1994 Some(Variant::from(expected_last)),
1995 ],
1996 );
1997 }
1998
1999 #[test]
2000 fn test_time64_nanosecond_row_builder() {
2001 use arrow::array::Time64NanosecondArray;
2002 use chrono::NaiveTime;
2003
2004 let time_data = vec![
2006 Some(0), None,
2008 Some(3661123456789), Some(86399999999999), ];
2011 let time_array = Time64NanosecondArray::from(time_data);
2012
2013 let expected_midnight = NaiveTime::from_hms_nano_opt(0, 0, 0, 0).unwrap();
2014 let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2016 let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2017
2018 test_row_builder_basic(
2019 &time_array,
2020 vec![
2021 Some(Variant::from(expected_midnight)),
2022 None,
2023 Some(Variant::from(expected_time)),
2024 Some(Variant::from(expected_last)),
2025 ],
2026 );
2027 }
2028}