parquet_variant_compute/
arrow_to_variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::type_conversion::{CastOptions, decimal_to_variant_decimal};
19use arrow::array::{
20    Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray,
21    GenericStringArray, OffsetSizeTrait, PrimitiveArray,
22};
23use arrow::compute::kernels::cast;
24use arrow::datatypes::{
25    ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType, Date32Type,
26    Date64Type, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
27    RunEndIndexType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
28    Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType,
29    TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
30};
31use arrow::temporal_conversions::{as_date, as_datetime, as_time};
32use arrow_schema::{ArrowError, DataType, TimeUnit};
33use chrono::{DateTime, TimeZone, Utc};
34use parquet_variant::{
35    ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
36    VariantDecimal16,
37};
38use std::collections::HashMap;
39use std::ops::Range;
40
41// ============================================================================
42// Row-oriented builders for efficient Arrow-to-Variant conversion
43// ============================================================================
44
45/// Row builder for converting Arrow arrays to VariantArray row by row
46pub(crate) enum ArrowToVariantRowBuilder<'a> {
47    Null(NullArrowToVariantBuilder),
48    Boolean(BooleanArrowToVariantBuilder<'a>),
49    PrimitiveInt8(PrimitiveArrowToVariantBuilder<'a, Int8Type>),
50    PrimitiveInt16(PrimitiveArrowToVariantBuilder<'a, Int16Type>),
51    PrimitiveInt32(PrimitiveArrowToVariantBuilder<'a, Int32Type>),
52    PrimitiveInt64(PrimitiveArrowToVariantBuilder<'a, Int64Type>),
53    PrimitiveUInt8(PrimitiveArrowToVariantBuilder<'a, UInt8Type>),
54    PrimitiveUInt16(PrimitiveArrowToVariantBuilder<'a, UInt16Type>),
55    PrimitiveUInt32(PrimitiveArrowToVariantBuilder<'a, UInt32Type>),
56    PrimitiveUInt64(PrimitiveArrowToVariantBuilder<'a, UInt64Type>),
57    PrimitiveFloat16(PrimitiveArrowToVariantBuilder<'a, Float16Type>),
58    PrimitiveFloat32(PrimitiveArrowToVariantBuilder<'a, Float32Type>),
59    PrimitiveFloat64(PrimitiveArrowToVariantBuilder<'a, Float64Type>),
60    Decimal32(Decimal32ArrowToVariantBuilder<'a>),
61    Decimal64(Decimal64ArrowToVariantBuilder<'a>),
62    Decimal128(Decimal128ArrowToVariantBuilder<'a>),
63    Decimal256(Decimal256ArrowToVariantBuilder<'a>),
64    TimestampSecond(TimestampArrowToVariantBuilder<'a, TimestampSecondType>),
65    TimestampMillisecond(TimestampArrowToVariantBuilder<'a, TimestampMillisecondType>),
66    TimestampMicrosecond(TimestampArrowToVariantBuilder<'a, TimestampMicrosecondType>),
67    TimestampNanosecond(TimestampArrowToVariantBuilder<'a, TimestampNanosecondType>),
68    Date32(DateArrowToVariantBuilder<'a, Date32Type>),
69    Date64(DateArrowToVariantBuilder<'a, Date64Type>),
70    Time32Second(TimeArrowToVariantBuilder<'a, Time32SecondType>),
71    Time32Millisecond(TimeArrowToVariantBuilder<'a, Time32MillisecondType>),
72    Time64Microsecond(TimeArrowToVariantBuilder<'a, Time64MicrosecondType>),
73    Time64Nanosecond(TimeArrowToVariantBuilder<'a, Time64NanosecondType>),
74    Binary(BinaryArrowToVariantBuilder<'a, i32>),
75    LargeBinary(BinaryArrowToVariantBuilder<'a, i64>),
76    BinaryView(BinaryViewArrowToVariantBuilder<'a>),
77    FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder<'a>),
78    Utf8(StringArrowToVariantBuilder<'a, i32>),
79    LargeUtf8(StringArrowToVariantBuilder<'a, i64>),
80    Utf8View(StringViewArrowToVariantBuilder<'a>),
81    List(ListArrowToVariantBuilder<'a, GenericListArray<i32>>),
82    LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
83    ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
84    LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
85    FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
86    Struct(StructArrowToVariantBuilder<'a>),
87    Map(MapArrowToVariantBuilder<'a>),
88    Union(UnionArrowToVariantBuilder<'a>),
89    Dictionary(DictionaryArrowToVariantBuilder<'a>),
90    RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder<'a, Int16Type>),
91    RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder<'a, Int32Type>),
92    RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder<'a, Int64Type>),
93}
94
95impl<'a> ArrowToVariantRowBuilder<'a> {
96    /// Appends a single row at the given index to the supplied builder.
97    pub fn append_row(
98        &mut self,
99        builder: &mut impl VariantBuilderExt,
100        index: usize,
101    ) -> Result<(), ArrowError> {
102        use ArrowToVariantRowBuilder::*;
103        match self {
104            Null(b) => b.append_row(builder, index),
105            Boolean(b) => b.append_row(builder, index),
106            PrimitiveInt8(b) => b.append_row(builder, index),
107            PrimitiveInt16(b) => b.append_row(builder, index),
108            PrimitiveInt32(b) => b.append_row(builder, index),
109            PrimitiveInt64(b) => b.append_row(builder, index),
110            PrimitiveUInt8(b) => b.append_row(builder, index),
111            PrimitiveUInt16(b) => b.append_row(builder, index),
112            PrimitiveUInt32(b) => b.append_row(builder, index),
113            PrimitiveUInt64(b) => b.append_row(builder, index),
114            PrimitiveFloat16(b) => b.append_row(builder, index),
115            PrimitiveFloat32(b) => b.append_row(builder, index),
116            PrimitiveFloat64(b) => b.append_row(builder, index),
117            Decimal32(b) => b.append_row(builder, index),
118            Decimal64(b) => b.append_row(builder, index),
119            Decimal128(b) => b.append_row(builder, index),
120            Decimal256(b) => b.append_row(builder, index),
121            TimestampSecond(b) => b.append_row(builder, index),
122            TimestampMillisecond(b) => b.append_row(builder, index),
123            TimestampMicrosecond(b) => b.append_row(builder, index),
124            TimestampNanosecond(b) => b.append_row(builder, index),
125            Date32(b) => b.append_row(builder, index),
126            Date64(b) => b.append_row(builder, index),
127            Time32Second(b) => b.append_row(builder, index),
128            Time32Millisecond(b) => b.append_row(builder, index),
129            Time64Microsecond(b) => b.append_row(builder, index),
130            Time64Nanosecond(b) => b.append_row(builder, index),
131            Binary(b) => b.append_row(builder, index),
132            LargeBinary(b) => b.append_row(builder, index),
133            BinaryView(b) => b.append_row(builder, index),
134            FixedSizeBinary(b) => b.append_row(builder, index),
135            Utf8(b) => b.append_row(builder, index),
136            LargeUtf8(b) => b.append_row(builder, index),
137            Utf8View(b) => b.append_row(builder, index),
138            List(b) => b.append_row(builder, index),
139            LargeList(b) => b.append_row(builder, index),
140            ListView(b) => b.append_row(builder, index),
141            LargeListView(b) => b.append_row(builder, index),
142            FixedSizeList(b) => b.append_row(builder, index),
143            Struct(b) => b.append_row(builder, index),
144            Map(b) => b.append_row(builder, index),
145            Union(b) => b.append_row(builder, index),
146            Dictionary(b) => b.append_row(builder, index),
147            RunEndEncodedInt16(b) => b.append_row(builder, index),
148            RunEndEncodedInt32(b) => b.append_row(builder, index),
149            RunEndEncodedInt64(b) => b.append_row(builder, index),
150        }
151    }
152}
153
154/// Factory function to create the appropriate row builder for a given DataType
155pub(crate) fn make_arrow_to_variant_row_builder<'a>(
156    data_type: &'a DataType,
157    array: &'a dyn Array,
158    options: &'a CastOptions,
159) -> Result<ArrowToVariantRowBuilder<'a>, ArrowError> {
160    use ArrowToVariantRowBuilder::*;
161    let builder =
162        match data_type {
163            DataType::Null => Null(NullArrowToVariantBuilder),
164            DataType::Boolean => Boolean(BooleanArrowToVariantBuilder::new(array)),
165            DataType::Int8 => PrimitiveInt8(PrimitiveArrowToVariantBuilder::new(array)),
166            DataType::Int16 => PrimitiveInt16(PrimitiveArrowToVariantBuilder::new(array)),
167            DataType::Int32 => PrimitiveInt32(PrimitiveArrowToVariantBuilder::new(array)),
168            DataType::Int64 => PrimitiveInt64(PrimitiveArrowToVariantBuilder::new(array)),
169            DataType::UInt8 => PrimitiveUInt8(PrimitiveArrowToVariantBuilder::new(array)),
170            DataType::UInt16 => PrimitiveUInt16(PrimitiveArrowToVariantBuilder::new(array)),
171            DataType::UInt32 => PrimitiveUInt32(PrimitiveArrowToVariantBuilder::new(array)),
172            DataType::UInt64 => PrimitiveUInt64(PrimitiveArrowToVariantBuilder::new(array)),
173            DataType::Float16 => PrimitiveFloat16(PrimitiveArrowToVariantBuilder::new(array)),
174            DataType::Float32 => PrimitiveFloat32(PrimitiveArrowToVariantBuilder::new(array)),
175            DataType::Float64 => PrimitiveFloat64(PrimitiveArrowToVariantBuilder::new(array)),
176            DataType::Decimal32(_, scale) => {
177                Decimal32(Decimal32ArrowToVariantBuilder::new(array, options, *scale))
178            }
179            DataType::Decimal64(_, scale) => {
180                Decimal64(Decimal64ArrowToVariantBuilder::new(array, options, *scale))
181            }
182            DataType::Decimal128(_, scale) => {
183                Decimal128(Decimal128ArrowToVariantBuilder::new(array, options, *scale))
184            }
185            DataType::Decimal256(_, scale) => {
186                Decimal256(Decimal256ArrowToVariantBuilder::new(array, options, *scale))
187            }
188            DataType::Timestamp(time_unit, time_zone) => {
189                match time_unit {
190                    TimeUnit::Second => TimestampSecond(TimestampArrowToVariantBuilder::new(
191                        array,
192                        options,
193                        time_zone.is_some(),
194                    )),
195                    TimeUnit::Millisecond => TimestampMillisecond(
196                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
197                    ),
198                    TimeUnit::Microsecond => TimestampMicrosecond(
199                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
200                    ),
201                    TimeUnit::Nanosecond => TimestampNanosecond(
202                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
203                    ),
204                }
205            }
206            DataType::Date32 => Date32(DateArrowToVariantBuilder::new(array, options)),
207            DataType::Date64 => Date64(DateArrowToVariantBuilder::new(array, options)),
208            DataType::Time32(time_unit) => match time_unit {
209                TimeUnit::Second => Time32Second(TimeArrowToVariantBuilder::new(array, options)),
210                TimeUnit::Millisecond => {
211                    Time32Millisecond(TimeArrowToVariantBuilder::new(array, options))
212                }
213                _ => {
214                    return Err(ArrowError::CastError(format!(
215                        "Unsupported Time32 unit: {time_unit:?}"
216                    )));
217                }
218            },
219            DataType::Time64(time_unit) => match time_unit {
220                TimeUnit::Microsecond => {
221                    Time64Microsecond(TimeArrowToVariantBuilder::new(array, options))
222                }
223                TimeUnit::Nanosecond => {
224                    Time64Nanosecond(TimeArrowToVariantBuilder::new(array, options))
225                }
226                _ => {
227                    return Err(ArrowError::CastError(format!(
228                        "Unsupported Time64 unit: {time_unit:?}"
229                    )));
230                }
231            },
232            DataType::Duration(_) | DataType::Interval(_) => {
233                return Err(ArrowError::InvalidArgumentError(
234                    "Casting duration/interval types to Variant is not supported. \
235                    The Variant format does not define duration/interval types."
236                        .to_string(),
237                ));
238            }
239            DataType::Binary => Binary(BinaryArrowToVariantBuilder::new(array)),
240            DataType::LargeBinary => LargeBinary(BinaryArrowToVariantBuilder::new(array)),
241            DataType::BinaryView => BinaryView(BinaryViewArrowToVariantBuilder::new(array)),
242            DataType::FixedSizeBinary(_) => {
243                FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder::new(array))
244            }
245            DataType::Utf8 => Utf8(StringArrowToVariantBuilder::new(array)),
246            DataType::LargeUtf8 => LargeUtf8(StringArrowToVariantBuilder::new(array)),
247            DataType::Utf8View => Utf8View(StringViewArrowToVariantBuilder::new(array)),
248            DataType::List(_) => List(ListArrowToVariantBuilder::new(array.as_list(), options)?),
249            DataType::LargeList(_) => {
250                LargeList(ListArrowToVariantBuilder::new(array.as_list(), options)?)
251            }
252            DataType::ListView(_) => ListView(ListArrowToVariantBuilder::new(
253                array.as_list_view(),
254                options,
255            )?),
256            DataType::LargeListView(_) => LargeListView(ListArrowToVariantBuilder::new(
257                array.as_list_view(),
258                options,
259            )?),
260            DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
261                array.as_fixed_size_list(),
262                options,
263            )?),
264            DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
265                array.as_struct(),
266                options,
267            )?),
268            DataType::Map(_, _) => Map(MapArrowToVariantBuilder::new(array, options)?),
269            DataType::Union(_, _) => Union(UnionArrowToVariantBuilder::new(array, options)?),
270            DataType::Dictionary(_, _) => {
271                Dictionary(DictionaryArrowToVariantBuilder::new(array, options)?)
272            }
273            DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
274                DataType::Int16 => {
275                    RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
276                }
277                DataType::Int32 => {
278                    RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
279                }
280                DataType::Int64 => {
281                    RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
282                }
283                _ => {
284                    return Err(ArrowError::CastError(format!(
285                        "Unsupported run ends type: {}",
286                        run_ends.data_type()
287                    )));
288                }
289            },
290        };
291    Ok(builder)
292}
293
294/// Macro to define (possibly generic) row builders with consistent structure and behavior.
295///
296/// The macro optionally allows to define a transform for values read from the underlying
297/// array. Transforms of the form `|value| { ... }` are infallible (and should produce something
298/// that implements `Into<Variant>`), while transforms of the form `|value| -> Option<_> { ... }`
299/// are fallible (and should produce `Option<impl Into<Variant>>`); a failed tarnsform will either
300/// append null to the builder or return an error, depending on cast options.
301///
302/// Also supports optional extra fields that are passed to the constructor and which are available
303/// by reference in the value transform. Providing a fallible value transform requires also
304/// providing the extra field `options: &'a CastOptions`.
305// TODO: If/when the macro_metavar_expr feature stabilizes, the `ignore` meta-function would allow
306// us to "use" captured tokens without emitting them:
307//
308// ```
309// $(
310//     ${ignore($value)}
311//     $(
312//         ${ignore($option_ty)}
313//         options: &$lifetime CastOptions,
314//     )?
315// )?
316// ```
317//
318// That, in turn, would allow us to inject the `options` field whenever the user specifies a
319// fallible value transform, instead of requiring them to manually define it. This might not be
320// worth the trouble, tho, because it makes for some pretty bulky and unwieldy macro expansions.
321macro_rules! define_row_builder {
322    (
323        struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
324        $( where $where_path:path: $where_bound:path $(,)? )?
325        $({ $($field:ident: $field_type:ty),+ $(,)? })?,
326        |$array_param:ident| -> $array_type:ty { $init_expr:expr }
327        $(, |$value:ident| $(-> Option<$option_ty:ty>)? $value_transform:expr)?
328    ) => {
329        pub(crate) struct $name<$lifetime $(, $generic: $bound )?>
330        $( where $where_path: $where_bound )?
331        {
332            array: &$lifetime $array_type,
333            $( $( $field: $field_type, )+ )?
334        }
335
336        impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic)?>
337        $( where $where_path: $where_bound )?
338        {
339            pub(crate) fn new($array_param: &$lifetime dyn Array $(, $( $field: $field_type ),+ )?) -> Self {
340                Self {
341                    array: $init_expr,
342                    $( $( $field, )+ )?
343                }
344            }
345
346            fn append_row(&self, builder: &mut impl VariantBuilderExt, index: usize) -> Result<(), ArrowError> {
347                if self.array.is_null(index) {
348                    builder.append_null();
349                } else {
350                    // Macro hygiene: Give any extra fields names the value transform can access.
351                    //
352                    // The value transform doesn't normally reference cast options, but the macro's
353                    // caller still has to declare the field because stable rust has no way to "use"
354                    // a captured token without emitting it. So, silence unused variable warnings,
355                    // assuming that's the `options` field. Unfortunately, that also silences
356                    // legitimate compiler warnings if an infallible value transform fails to use
357                    // its first extra field.
358                    $(
359                        #[allow(unused)]
360                        $( let $field = &self.$field; )+
361                    )?
362
363                    // Apply the value transform, if any (with name swapping for hygiene)
364                    let value = self.array.value(index);
365                    $(
366                        let $value = value;
367                        let value = $value_transform;
368                        $(
369                            // NOTE: The `?` macro expansion fails without the type annotation.
370                            let Some(value): Option<$option_ty> = value else {
371                                if self.options.strict {
372                                    return Err(ArrowError::ComputeError(format!(
373                                        "Failed to convert value at index {index}: conversion failed",
374                                    )));
375                                } else {
376                                    // Overflow is encoded as Variant::Null,
377                                    // distinct from None indicating a missing value
378                                    builder.append_value(Variant::Null);
379                                    return Ok(());
380                                }
381                            };
382                        )?
383                    )?
384                    builder.append_value(value);
385                }
386                Ok(())
387            }
388        }
389    };
390}
391
392define_row_builder!(
393    struct BooleanArrowToVariantBuilder<'a>,
394    |array| -> arrow::array::BooleanArray { array.as_boolean() }
395);
396
397define_row_builder!(
398    struct PrimitiveArrowToVariantBuilder<'a, T: ArrowPrimitiveType>
399    where T::Native: Into<Variant<'a, 'a>>,
400    |array| -> PrimitiveArray<T> { array.as_primitive() }
401);
402
403define_row_builder!(
404    struct Decimal32ArrowToVariantBuilder<'a> {
405        options: &'a CastOptions,
406        scale: i8,
407    },
408    |array| -> arrow::array::Decimal32Array { array.as_primitive() },
409    |value| -> Option<_> { decimal_to_variant_decimal!(value, scale, i32, VariantDecimal4) }
410);
411
412define_row_builder!(
413    struct Decimal64ArrowToVariantBuilder<'a> {
414        options: &'a CastOptions,
415        scale: i8,
416    },
417    |array| -> arrow::array::Decimal64Array { array.as_primitive() },
418    |value| -> Option<_> { decimal_to_variant_decimal!(value, scale, i64, VariantDecimal8) }
419);
420
421define_row_builder!(
422    struct Decimal128ArrowToVariantBuilder<'a> {
423        options: &'a CastOptions,
424        scale: i8,
425    },
426    |array| -> arrow::array::Decimal128Array { array.as_primitive() },
427    |value| -> Option<_> { decimal_to_variant_decimal!(value, scale, i128, VariantDecimal16) }
428);
429
430define_row_builder!(
431    struct Decimal256ArrowToVariantBuilder<'a> {
432        options: &'a CastOptions,
433        scale: i8,
434    },
435    |array| -> arrow::array::Decimal256Array { array.as_primitive() },
436    |value| -> Option<_> {
437        // Decimal256 needs special handling - convert to i128 if possible
438        value.to_i128().and_then(|i128_val| {
439            decimal_to_variant_decimal!(i128_val, scale, i128, VariantDecimal16)
440        })
441    }
442);
443
444define_row_builder!(
445    struct TimestampArrowToVariantBuilder<'a, T: ArrowTimestampType> {
446        options: &'a CastOptions,
447        has_time_zone: bool,
448    },
449    |array| -> PrimitiveArray<T> { array.as_primitive() },
450    |value| -> Option<_> {
451        // Convert using Arrow's temporal conversion functions
452        as_datetime::<T>(value).map(|naive_datetime| {
453            if *has_time_zone {
454                // Has timezone -> DateTime<Utc> -> TimestampMicros/TimestampNanos
455                let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&naive_datetime);
456                Variant::from(utc_dt) // Uses From<DateTime<Utc>> for Variant
457            } else {
458                // No timezone -> NaiveDateTime -> TimestampNtzMicros/TimestampNtzNanos
459                Variant::from(naive_datetime) // Uses From<NaiveDateTime> for Variant
460            }
461        })
462    }
463);
464
465define_row_builder!(
466    struct DateArrowToVariantBuilder<'a, T: ArrowTemporalType>
467    where
468        i64: From<T::Native>,
469    {
470        options: &'a CastOptions,
471    },
472    |array| -> PrimitiveArray<T> { array.as_primitive() },
473    |value| -> Option<_> {
474        let date_value = i64::from(value);
475        as_date::<T>(date_value)
476    }
477);
478
479define_row_builder!(
480    struct TimeArrowToVariantBuilder<'a, T: ArrowTemporalType>
481    where
482        i64: From<T::Native>,
483    {
484        options: &'a CastOptions,
485    },
486    |array| -> PrimitiveArray<T> { array.as_primitive() },
487    |value| -> Option<_> {
488        let time_value = i64::from(value);
489        as_time::<T>(time_value)
490    }
491);
492
493define_row_builder!(
494    struct BinaryArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
495    |array| -> GenericBinaryArray<O> { array.as_binary() }
496);
497
498define_row_builder!(
499    struct BinaryViewArrowToVariantBuilder<'a>,
500    |array| -> arrow::array::BinaryViewArray { array.as_byte_view() }
501);
502
503define_row_builder!(
504    struct FixedSizeBinaryArrowToVariantBuilder<'a>,
505    |array| -> arrow::array::FixedSizeBinaryArray { array.as_fixed_size_binary() }
506);
507
508define_row_builder!(
509    struct StringArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
510    |array| -> GenericStringArray<O> { array.as_string() }
511);
512
513define_row_builder!(
514    struct StringViewArrowToVariantBuilder<'a>,
515    |array| -> arrow::array::StringViewArray { array.as_string_view() }
516);
517
518/// Null builder that always appends null
519pub(crate) struct NullArrowToVariantBuilder;
520
521impl NullArrowToVariantBuilder {
522    fn append_row(
523        &mut self,
524        builder: &mut impl VariantBuilderExt,
525        _index: usize,
526    ) -> Result<(), ArrowError> {
527        builder.append_null();
528        Ok(())
529    }
530}
531
532/// Generic list builder for ListLikeArray types including List, LargeList, ListView, LargeListView,
533/// and FixedSizeList
534pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
535    list_array: &'a L,
536    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
537}
538
539impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
540    pub(crate) fn new(array: &'a L, options: &'a CastOptions) -> Result<Self, ArrowError> {
541        let values = array.values();
542        let values_builder =
543            make_arrow_to_variant_row_builder(values.data_type(), values, options)?;
544
545        Ok(Self {
546            list_array: array,
547            values_builder: Box::new(values_builder),
548        })
549    }
550
551    fn append_row(
552        &mut self,
553        builder: &mut impl VariantBuilderExt,
554        index: usize,
555    ) -> Result<(), ArrowError> {
556        if self.list_array.is_null(index) {
557            builder.append_null();
558            return Ok(());
559        }
560
561        let range = self.list_array.element_range(index);
562
563        let mut list_builder = builder.try_new_list()?;
564        for value_index in range {
565            self.values_builder
566                .append_row(&mut list_builder, value_index)?;
567        }
568        list_builder.finish();
569        Ok(())
570    }
571}
572
573/// Trait for list-like arrays that can provide element ranges
574pub(crate) trait ListLikeArray: Array {
575    /// Get the values array
576    fn values(&self) -> &dyn Array;
577
578    /// Get the start and end indices for a list element
579    fn element_range(&self, index: usize) -> Range<usize>;
580}
581
582impl<O: OffsetSizeTrait> ListLikeArray for GenericListArray<O> {
583    fn values(&self) -> &dyn Array {
584        self.values()
585    }
586
587    fn element_range(&self, index: usize) -> Range<usize> {
588        let offsets = self.offsets();
589        let start = offsets[index].as_usize();
590        let end = offsets[index + 1].as_usize();
591        start..end
592    }
593}
594
595impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
596    fn values(&self) -> &dyn Array {
597        self.values()
598    }
599
600    fn element_range(&self, index: usize) -> Range<usize> {
601        let offsets = self.value_offsets();
602        let sizes = self.value_sizes();
603        let offset = offsets[index].as_usize();
604        let size = sizes[index].as_usize();
605        offset..(offset + size)
606    }
607}
608
609impl ListLikeArray for FixedSizeListArray {
610    fn values(&self) -> &dyn Array {
611        self.values()
612    }
613
614    fn element_range(&self, index: usize) -> Range<usize> {
615        let value_length = self.value_length().as_usize();
616        let offset = index * value_length;
617        offset..(offset + value_length)
618    }
619}
620
621/// Struct builder for StructArray
622pub(crate) struct StructArrowToVariantBuilder<'a> {
623    struct_array: &'a arrow::array::StructArray,
624    field_builders: Vec<(&'a str, ArrowToVariantRowBuilder<'a>)>,
625}
626
627impl<'a> StructArrowToVariantBuilder<'a> {
628    pub(crate) fn new(
629        struct_array: &'a arrow::array::StructArray,
630        options: &'a CastOptions,
631    ) -> Result<Self, ArrowError> {
632        let mut field_builders = Vec::new();
633
634        // Create a row builder for each field
635        for (field_name, field_array) in struct_array
636            .column_names()
637            .iter()
638            .zip(struct_array.columns().iter())
639        {
640            let field_builder = make_arrow_to_variant_row_builder(
641                field_array.data_type(),
642                field_array.as_ref(),
643                options,
644            )?;
645            field_builders.push((*field_name, field_builder));
646        }
647
648        Ok(Self {
649            struct_array,
650            field_builders,
651        })
652    }
653
654    fn append_row(
655        &mut self,
656        builder: &mut impl VariantBuilderExt,
657        index: usize,
658    ) -> Result<(), ArrowError> {
659        if self.struct_array.is_null(index) {
660            builder.append_null();
661        } else {
662            // Create object builder for this struct row
663            let mut obj_builder = builder.try_new_object()?;
664
665            // Process each field
666            for (field_name, row_builder) in &mut self.field_builders {
667                let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
668                row_builder.append_row(&mut field_builder, index)?;
669            }
670
671            obj_builder.finish();
672        }
673        Ok(())
674    }
675}
676
677/// Map builder for MapArray types
678pub(crate) struct MapArrowToVariantBuilder<'a> {
679    map_array: &'a arrow::array::MapArray,
680    key_strings: arrow::array::StringArray,
681    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
682}
683
684impl<'a> MapArrowToVariantBuilder<'a> {
685    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
686        let map_array = array.as_map();
687
688        // Pre-cast keys to strings once
689        let keys = cast(map_array.keys(), &DataType::Utf8)?;
690        let key_strings = keys.as_string::<i32>().clone();
691
692        // Create recursive builder for values
693        let values = map_array.values();
694        let values_builder =
695            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
696
697        Ok(Self {
698            map_array,
699            key_strings,
700            values_builder: Box::new(values_builder),
701        })
702    }
703
704    fn append_row(
705        &mut self,
706        builder: &mut impl VariantBuilderExt,
707        index: usize,
708    ) -> Result<(), ArrowError> {
709        // Check for NULL map first (via null bitmap)
710        if self.map_array.is_null(index) {
711            builder.append_null();
712            return Ok(());
713        }
714
715        let offsets = self.map_array.offsets();
716        let start = offsets[index].as_usize();
717        let end = offsets[index + 1].as_usize();
718
719        // Create object builder for this map
720        let mut object_builder = builder.try_new_object()?;
721
722        // Add each key-value pair (loop does nothing for empty maps - correct!)
723        for kv_index in start..end {
724            let key = self.key_strings.value(kv_index);
725            let mut field_builder = ObjectFieldBuilder::new(key, &mut object_builder);
726            self.values_builder
727                .append_row(&mut field_builder, kv_index)?;
728        }
729
730        object_builder.finish();
731        Ok(())
732    }
733}
734
735/// Union builder for both sparse and dense union arrays
736///
737/// NOTE: Union type ids are _not_ required to be dense, hence the hash map for child builders.
738pub(crate) struct UnionArrowToVariantBuilder<'a> {
739    union_array: &'a arrow::array::UnionArray,
740    child_builders: HashMap<i8, Box<ArrowToVariantRowBuilder<'a>>>,
741}
742
743impl<'a> UnionArrowToVariantBuilder<'a> {
744    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
745        let union_array = array.as_union();
746        let type_ids = union_array.type_ids();
747
748        // Create child builders for each union field
749        let mut child_builders = HashMap::new();
750        for &type_id in type_ids {
751            let child_array = union_array.child(type_id);
752            let child_builder = make_arrow_to_variant_row_builder(
753                child_array.data_type(),
754                child_array.as_ref(),
755                options,
756            )?;
757            child_builders.insert(type_id, Box::new(child_builder));
758        }
759
760        Ok(Self {
761            union_array,
762            child_builders,
763        })
764    }
765
766    fn append_row(
767        &mut self,
768        builder: &mut impl VariantBuilderExt,
769        index: usize,
770    ) -> Result<(), ArrowError> {
771        let type_id = self.union_array.type_id(index);
772        let value_offset = self.union_array.value_offset(index);
773
774        // Delegate to the appropriate child builder, or append null to handle an invalid type_id
775        match self.child_builders.get_mut(&type_id) {
776            Some(child_builder) => child_builder.append_row(builder, value_offset)?,
777            None => builder.append_null(),
778        }
779
780        Ok(())
781    }
782}
783
784/// Dictionary array builder with simple O(1) indexing
785pub(crate) struct DictionaryArrowToVariantBuilder<'a> {
786    keys: &'a dyn Array, // only needed for null checks
787    normalized_keys: Vec<usize>,
788    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
789}
790
791impl<'a> DictionaryArrowToVariantBuilder<'a> {
792    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
793        let dict_array = array.as_any_dictionary();
794        let values = dict_array.values();
795        let values_builder =
796            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
797
798        // WARNING: normalized_keys panics if values is empty
799        let normalized_keys = match values.len() {
800            0 => Vec::new(),
801            _ => dict_array.normalized_keys(),
802        };
803
804        Ok(Self {
805            keys: dict_array.keys(),
806            normalized_keys,
807            values_builder: Box::new(values_builder),
808        })
809    }
810
811    fn append_row(
812        &mut self,
813        builder: &mut impl VariantBuilderExt,
814        index: usize,
815    ) -> Result<(), ArrowError> {
816        if self.keys.is_null(index) {
817            builder.append_null();
818        } else {
819            let normalized_key = self.normalized_keys[index];
820            self.values_builder.append_row(builder, normalized_key)?;
821        }
822        Ok(())
823    }
824}
825
826/// Run-end encoded array builder with efficient sequential access
827pub(crate) struct RunEndEncodedArrowToVariantBuilder<'a, R: RunEndIndexType> {
828    run_array: &'a arrow::array::RunArray<R>,
829    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
830
831    run_ends: &'a [R::Native],
832    run_number: usize, // Physical index into run_ends and values
833    run_start: usize,  // Logical start index of current run
834}
835
836impl<'a, R: RunEndIndexType> RunEndEncodedArrowToVariantBuilder<'a, R> {
837    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
838        let Some(run_array) = array.as_run_opt() else {
839            return Err(ArrowError::CastError("Expected RunArray".to_string()));
840        };
841
842        let values = run_array.values();
843        let values_builder =
844            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
845
846        Ok(Self {
847            run_array,
848            values_builder: Box::new(values_builder),
849            run_ends: run_array.run_ends().values(),
850            run_number: 0,
851            run_start: 0,
852        })
853    }
854
855    fn set_run_for_index(&mut self, index: usize) -> Result<(), ArrowError> {
856        if index >= self.run_start {
857            let Some(run_end) = self.run_ends.get(self.run_number) else {
858                return Err(ArrowError::CastError(format!(
859                    "Index {index} beyond run array"
860                )));
861            };
862            if index < run_end.as_usize() {
863                return Ok(());
864            }
865            if index == run_end.as_usize() {
866                self.run_number += 1;
867                self.run_start = run_end.as_usize();
868                return Ok(());
869            }
870        }
871
872        // Use partition_point for all non-sequential cases
873        let run_number = self
874            .run_ends
875            .partition_point(|&run_end| run_end.as_usize() <= index);
876        if run_number >= self.run_ends.len() {
877            return Err(ArrowError::CastError(format!(
878                "Index {index} beyond run array"
879            )));
880        }
881        self.run_number = run_number;
882        self.run_start = match run_number {
883            0 => 0,
884            _ => self.run_ends[run_number - 1].as_usize(),
885        };
886        Ok(())
887    }
888
889    fn append_row(
890        &mut self,
891        builder: &mut impl VariantBuilderExt,
892        index: usize,
893    ) -> Result<(), ArrowError> {
894        self.set_run_for_index(index)?;
895
896        // Handle null values
897        if self.run_array.values().is_null(self.run_number) {
898            builder.append_null();
899            return Ok(());
900        }
901
902        // Re-encode the value
903        self.values_builder.append_row(builder, self.run_number)?;
904
905        Ok(())
906    }
907}
908
909#[cfg(test)]
910mod tests {
911    use super::*;
912    use crate::{VariantArray, VariantArrayBuilder};
913    use arrow::array::{ArrayRef, BooleanArray, Int32Array, StringArray};
914    use std::sync::Arc;
915
916    /// Builds a VariantArray from an Arrow array using the row builder.
917    fn execute_row_builder_test(array: &dyn Array) -> VariantArray {
918        execute_row_builder_test_with_options(array, CastOptions::default())
919    }
920
921    /// Variant of `execute_row_builder_test` that allows specifying options
922    fn execute_row_builder_test_with_options(
923        array: &dyn Array,
924        options: CastOptions,
925    ) -> VariantArray {
926        let mut row_builder =
927            make_arrow_to_variant_row_builder(array.data_type(), array, &options).unwrap();
928
929        let mut array_builder = VariantArrayBuilder::new(array.len());
930
931        // The repetitive loop that appears in every test
932        for i in 0..array.len() {
933            row_builder.append_row(&mut array_builder, i).unwrap();
934        }
935
936        let variant_array = array_builder.build();
937        assert_eq!(variant_array.len(), array.len());
938        variant_array
939    }
940
941    /// Generic helper function to test row builders with basic assertion patterns.
942    /// Uses execute_row_builder_test and adds simple value comparison assertions.
943    fn test_row_builder_basic(array: &dyn Array, expected_values: Vec<Option<Variant>>) {
944        test_row_builder_basic_with_options(array, expected_values, CastOptions::default());
945    }
946
947    /// Variant of `test_row_builder_basic` that allows specifying options
948    fn test_row_builder_basic_with_options(
949        array: &dyn Array,
950        expected_values: Vec<Option<Variant>>,
951        options: CastOptions,
952    ) {
953        let variant_array = execute_row_builder_test_with_options(array, options);
954
955        // The repetitive assertion pattern
956        for (i, expected) in expected_values.iter().enumerate() {
957            match expected {
958                Some(variant) => {
959                    assert_eq!(variant_array.value(i), *variant, "Mismatch at index {}", i)
960                }
961                None => assert!(variant_array.is_null(i), "Expected null at index {}", i),
962            }
963        }
964    }
965
966    #[test]
967    fn test_primitive_row_builder() {
968        let int_array = Int32Array::from(vec![Some(42), None, Some(100)]);
969        test_row_builder_basic(
970            &int_array,
971            vec![Some(Variant::Int32(42)), None, Some(Variant::Int32(100))],
972        );
973    }
974
975    #[test]
976    fn test_string_row_builder() {
977        let string_array = StringArray::from(vec![Some("hello"), None, Some("world")]);
978        test_row_builder_basic(
979            &string_array,
980            vec![
981                Some(Variant::from("hello")),
982                None,
983                Some(Variant::from("world")),
984            ],
985        );
986    }
987
988    #[test]
989    fn test_boolean_row_builder() {
990        let bool_array = BooleanArray::from(vec![Some(true), None, Some(false)]);
991        test_row_builder_basic(
992            &bool_array,
993            vec![Some(Variant::from(true)), None, Some(Variant::from(false))],
994        );
995    }
996
997    #[test]
998    fn test_struct_row_builder() {
999        use arrow::array::{ArrayRef, Int32Array, StringArray, StructArray};
1000        use arrow_schema::{DataType, Field};
1001        use std::sync::Arc;
1002
1003        // Create a struct array with int and string fields
1004        let int_field = Field::new("id", DataType::Int32, true);
1005        let string_field = Field::new("name", DataType::Utf8, true);
1006
1007        let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1008        let string_array = StringArray::from(vec![Some("Alice"), Some("Bob"), None]);
1009
1010        let struct_array = StructArray::try_new(
1011            vec![int_field, string_field].into(),
1012            vec![
1013                Arc::new(int_array) as ArrayRef,
1014                Arc::new(string_array) as ArrayRef,
1015            ],
1016            None,
1017        )
1018        .unwrap();
1019
1020        let variant_array = execute_row_builder_test(&struct_array);
1021
1022        // Check first row - should have both fields
1023        let first_variant = variant_array.value(0);
1024        assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1025        assert_eq!(
1026            first_variant.get_object_field("name"),
1027            Some(Variant::from("Alice"))
1028        );
1029
1030        // Check second row - should have name field but not id (null field omitted)
1031        let second_variant = variant_array.value(1);
1032        assert_eq!(second_variant.get_object_field("id"), None); // null field omitted
1033        assert_eq!(
1034            second_variant.get_object_field("name"),
1035            Some(Variant::from("Bob"))
1036        );
1037
1038        // Check third row - should have id field but not name (null field omitted)
1039        let third_variant = variant_array.value(2);
1040        assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(3)));
1041        assert_eq!(third_variant.get_object_field("name"), None); // null field omitted
1042    }
1043
1044    #[test]
1045    fn test_run_end_encoded_row_builder() {
1046        use arrow::array::{Int32Array, RunArray};
1047        use arrow::datatypes::Int32Type;
1048
1049        // Create a run-end encoded array: [A, A, B, B, B, C]
1050        // run_ends: [2, 5, 6]
1051        // values: ["A", "B", "C"]
1052        let values = StringArray::from(vec!["A", "B", "C"]);
1053        let run_ends = Int32Array::from(vec![2, 5, 6]);
1054        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1055
1056        let variant_array = execute_row_builder_test(&run_array);
1057
1058        // Verify the values
1059        assert_eq!(variant_array.value(0), Variant::from("A")); // Run 0
1060        assert_eq!(variant_array.value(1), Variant::from("A")); // Run 0
1061        assert_eq!(variant_array.value(2), Variant::from("B")); // Run 1
1062        assert_eq!(variant_array.value(3), Variant::from("B")); // Run 1
1063        assert_eq!(variant_array.value(4), Variant::from("B")); // Run 1
1064        assert_eq!(variant_array.value(5), Variant::from("C")); // Run 2
1065    }
1066
1067    #[test]
1068    fn test_run_end_encoded_random_access() {
1069        use arrow::array::{Int32Array, RunArray};
1070        use arrow::datatypes::Int32Type;
1071
1072        // Create a run-end encoded array: [A, A, B, B, B, C]
1073        let values = StringArray::from(vec!["A", "B", "C"]);
1074        let run_ends = Int32Array::from(vec![2, 5, 6]);
1075        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1076
1077        let options = CastOptions::default();
1078        let mut row_builder =
1079            make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1080
1081        // Test random access pattern (backward jumps, forward jumps)
1082        let access_pattern = [0, 5, 2, 4, 1, 3]; // Mix of all cases
1083        let expected_values = ["A", "C", "B", "B", "A", "B"];
1084
1085        for (i, &index) in access_pattern.iter().enumerate() {
1086            let mut array_builder = VariantArrayBuilder::new(1);
1087            row_builder.append_row(&mut array_builder, index).unwrap();
1088            let variant_array = array_builder.build();
1089            assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1090        }
1091    }
1092
1093    #[test]
1094    fn test_run_end_encoded_with_nulls() {
1095        use arrow::array::{Int32Array, RunArray};
1096        use arrow::datatypes::Int32Type;
1097
1098        // Create a run-end encoded array with null values: [A, A, null, null, B]
1099        let values = StringArray::from(vec![Some("A"), None, Some("B")]);
1100        let run_ends = Int32Array::from(vec![2, 4, 5]);
1101        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1102
1103        let options = CastOptions::default();
1104        let mut row_builder =
1105            make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1106        let mut array_builder = VariantArrayBuilder::new(5);
1107
1108        // Test sequential access
1109        for i in 0..5 {
1110            row_builder.append_row(&mut array_builder, i).unwrap();
1111        }
1112
1113        let variant_array = array_builder.build();
1114        assert_eq!(variant_array.len(), 5);
1115
1116        // Verify the values
1117        assert_eq!(variant_array.value(0), Variant::from("A")); // Run 0
1118        assert_eq!(variant_array.value(1), Variant::from("A")); // Run 0
1119        assert!(variant_array.is_null(2)); // Run 1 (null)
1120        assert!(variant_array.is_null(3)); // Run 1 (null)
1121        assert_eq!(variant_array.value(4), Variant::from("B")); // Run 2
1122    }
1123
1124    #[test]
1125    fn test_dictionary_row_builder() {
1126        use arrow::array::{DictionaryArray, Int32Array};
1127        use arrow::datatypes::Int32Type;
1128
1129        // Create a dictionary array: keys=[0, 1, 0, 2, 1], values=["apple", "banana", "cherry"]
1130        let values = StringArray::from(vec!["apple", "banana", "cherry"]);
1131        let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1132        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1133
1134        let variant_array = execute_row_builder_test(&dict_array);
1135
1136        // Verify the values match the dictionary lookup
1137        assert_eq!(variant_array.value(0), Variant::from("apple")); // keys[0] = 0 -> values[0] = "apple"
1138        assert_eq!(variant_array.value(1), Variant::from("banana")); // keys[1] = 1 -> values[1] = "banana"
1139        assert_eq!(variant_array.value(2), Variant::from("apple")); // keys[2] = 0 -> values[0] = "apple"
1140        assert_eq!(variant_array.value(3), Variant::from("cherry")); // keys[3] = 2 -> values[2] = "cherry"
1141        assert_eq!(variant_array.value(4), Variant::from("banana")); // keys[4] = 1 -> values[1] = "banana"
1142    }
1143
1144    #[test]
1145    fn test_dictionary_with_nulls() {
1146        use arrow::array::{DictionaryArray, Int32Array};
1147        use arrow::datatypes::Int32Type;
1148
1149        // Create a dictionary array with null keys: keys=[0, null, 1, null, 2], values=["x", "y", "z"]
1150        let values = StringArray::from(vec!["x", "y", "z"]);
1151        let keys = Int32Array::from(vec![Some(0), None, Some(1), None, Some(2)]);
1152        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1153
1154        let options = CastOptions::default();
1155        let mut row_builder =
1156            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1157                .unwrap();
1158        let mut array_builder = VariantArrayBuilder::new(5);
1159
1160        // Test sequential access
1161        for i in 0..5 {
1162            row_builder.append_row(&mut array_builder, i).unwrap();
1163        }
1164
1165        let variant_array = array_builder.build();
1166        assert_eq!(variant_array.len(), 5);
1167
1168        // Verify the values and nulls
1169        assert_eq!(variant_array.value(0), Variant::from("x")); // keys[0] = 0 -> values[0] = "x"
1170        assert!(variant_array.is_null(1)); // keys[1] = null
1171        assert_eq!(variant_array.value(2), Variant::from("y")); // keys[2] = 1 -> values[1] = "y"
1172        assert!(variant_array.is_null(3)); // keys[3] = null
1173        assert_eq!(variant_array.value(4), Variant::from("z")); // keys[4] = 2 -> values[2] = "z"
1174    }
1175
1176    #[test]
1177    fn test_dictionary_random_access() {
1178        use arrow::array::{DictionaryArray, Int32Array};
1179        use arrow::datatypes::Int32Type;
1180
1181        // Create a dictionary array: keys=[0, 1, 2, 0, 1, 2], values=["red", "green", "blue"]
1182        let values = StringArray::from(vec!["red", "green", "blue"]);
1183        let keys = Int32Array::from(vec![0, 1, 2, 0, 1, 2]);
1184        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1185
1186        let options = CastOptions::default();
1187        let mut row_builder =
1188            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1189                .unwrap();
1190
1191        // Test random access pattern
1192        let access_pattern = [5, 0, 3, 1, 4, 2]; // Random order
1193        let expected_values = ["blue", "red", "red", "green", "green", "blue"];
1194
1195        for (i, &index) in access_pattern.iter().enumerate() {
1196            let mut array_builder = VariantArrayBuilder::new(1);
1197            row_builder.append_row(&mut array_builder, index).unwrap();
1198            let variant_array = array_builder.build();
1199            assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1200        }
1201    }
1202
1203    #[test]
1204    fn test_nested_dictionary() {
1205        use arrow::array::{DictionaryArray, Int32Array, StructArray};
1206        use arrow::datatypes::{Field, Int32Type};
1207
1208        // Create a dictionary with struct values
1209        let id_array = Int32Array::from(vec![1, 2, 3]);
1210        let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
1211        let struct_array = StructArray::from(vec![
1212            (
1213                Arc::new(Field::new("id", DataType::Int32, false)),
1214                Arc::new(id_array) as ArrayRef,
1215            ),
1216            (
1217                Arc::new(Field::new("name", DataType::Utf8, false)),
1218                Arc::new(name_array) as ArrayRef,
1219            ),
1220        ]);
1221
1222        let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1223        let dict_array =
1224            DictionaryArray::<Int32Type>::try_new(keys, Arc::new(struct_array)).unwrap();
1225
1226        let options = CastOptions::default();
1227        let mut row_builder =
1228            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1229                .unwrap();
1230        let mut array_builder = VariantArrayBuilder::new(5);
1231
1232        // Test sequential access
1233        for i in 0..5 {
1234            row_builder.append_row(&mut array_builder, i).unwrap();
1235        }
1236
1237        let variant_array = array_builder.build();
1238        assert_eq!(variant_array.len(), 5);
1239
1240        // Verify the nested struct values
1241        let first_variant = variant_array.value(0);
1242        assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1243        assert_eq!(
1244            first_variant.get_object_field("name"),
1245            Some(Variant::from("Alice"))
1246        );
1247
1248        let second_variant = variant_array.value(1);
1249        assert_eq!(
1250            second_variant.get_object_field("id"),
1251            Some(Variant::from(2))
1252        );
1253        assert_eq!(
1254            second_variant.get_object_field("name"),
1255            Some(Variant::from("Bob"))
1256        );
1257
1258        // Test that repeated keys give same values
1259        let third_variant = variant_array.value(2);
1260        assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(1)));
1261        assert_eq!(
1262            third_variant.get_object_field("name"),
1263            Some(Variant::from("Alice"))
1264        );
1265    }
1266
1267    #[test]
1268    fn test_list_row_builder() {
1269        use arrow::array::ListArray;
1270
1271        // Create a list array: [[1, 2], [3, 4, 5], null, []]
1272        let data = vec![
1273            Some(vec![Some(1), Some(2)]),
1274            Some(vec![Some(3), Some(4), Some(5)]),
1275            None,
1276            Some(vec![]),
1277        ];
1278        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1279
1280        let variant_array = execute_row_builder_test(&list_array);
1281
1282        // Row 0: [1, 2]
1283        let row0 = variant_array.value(0);
1284        let list0 = row0.as_list().unwrap();
1285        assert_eq!(list0.len(), 2);
1286        assert_eq!(list0.get(0), Some(Variant::from(1)));
1287        assert_eq!(list0.get(1), Some(Variant::from(2)));
1288
1289        // Row 1: [3, 4, 5]
1290        let row1 = variant_array.value(1);
1291        let list1 = row1.as_list().unwrap();
1292        assert_eq!(list1.len(), 3);
1293        assert_eq!(list1.get(0), Some(Variant::from(3)));
1294        assert_eq!(list1.get(1), Some(Variant::from(4)));
1295        assert_eq!(list1.get(2), Some(Variant::from(5)));
1296
1297        // Row 2: null
1298        assert!(variant_array.is_null(2));
1299
1300        // Row 3: []
1301        let row3 = variant_array.value(3);
1302        let list3 = row3.as_list().unwrap();
1303        assert_eq!(list3.len(), 0);
1304    }
1305
1306    #[test]
1307    fn test_sliced_list_row_builder() {
1308        use arrow::array::ListArray;
1309
1310        // Create a list array: [[1, 2], [3, 4, 5], [6]]
1311        let data = vec![
1312            Some(vec![Some(1), Some(2)]),
1313            Some(vec![Some(3), Some(4), Some(5)]),
1314            Some(vec![Some(6)]),
1315        ];
1316        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1317
1318        // Slice to get just the middle element: [[3, 4, 5]]
1319        let sliced_array = list_array.slice(1, 1);
1320
1321        let options = CastOptions::default();
1322        let mut row_builder =
1323            make_arrow_to_variant_row_builder(sliced_array.data_type(), &sliced_array, &options)
1324                .unwrap();
1325        let mut variant_array_builder = VariantArrayBuilder::new(sliced_array.len());
1326
1327        // Test the single row
1328        row_builder
1329            .append_row(&mut variant_array_builder, 0)
1330            .unwrap();
1331        let variant_array = variant_array_builder.build();
1332
1333        // Verify result
1334        assert_eq!(variant_array.len(), 1);
1335
1336        // Row 0: [3, 4, 5]
1337        let row0 = variant_array.value(0);
1338        let list0 = row0.as_list().unwrap();
1339        assert_eq!(list0.len(), 3);
1340        assert_eq!(list0.get(0), Some(Variant::from(3)));
1341        assert_eq!(list0.get(1), Some(Variant::from(4)));
1342        assert_eq!(list0.get(2), Some(Variant::from(5)));
1343    }
1344
1345    #[test]
1346    fn test_nested_list_row_builder() {
1347        use arrow::array::ListArray;
1348        use arrow::datatypes::Field;
1349
1350        // Build the nested structure manually
1351        let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
1352        let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_field), true));
1353
1354        let values_data = vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])];
1355        let values_list = ListArray::from_iter_primitive::<Int32Type, _, _>(values_data);
1356
1357        let outer_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 2].into());
1358        let outer_list = ListArray::new(
1359            inner_list_field,
1360            outer_offsets,
1361            Arc::new(values_list),
1362            Some(arrow::buffer::NullBuffer::from(vec![true, false])),
1363        );
1364
1365        let options = CastOptions::default();
1366        let mut row_builder =
1367            make_arrow_to_variant_row_builder(outer_list.data_type(), &outer_list, &options)
1368                .unwrap();
1369        let mut variant_array_builder = VariantArrayBuilder::new(outer_list.len());
1370
1371        for i in 0..outer_list.len() {
1372            row_builder
1373                .append_row(&mut variant_array_builder, i)
1374                .unwrap();
1375        }
1376
1377        let variant_array = variant_array_builder.build();
1378
1379        // Verify results
1380        assert_eq!(variant_array.len(), 2);
1381
1382        // Row 0: [[1, 2], [3]]
1383        let row0 = variant_array.value(0);
1384        let outer_list0 = row0.as_list().unwrap();
1385        assert_eq!(outer_list0.len(), 2);
1386
1387        let inner_list0_0 = outer_list0.get(0).unwrap();
1388        let inner_list0_0 = inner_list0_0.as_list().unwrap();
1389        assert_eq!(inner_list0_0.len(), 2);
1390        assert_eq!(inner_list0_0.get(0), Some(Variant::from(1)));
1391        assert_eq!(inner_list0_0.get(1), Some(Variant::from(2)));
1392
1393        let inner_list0_1 = outer_list0.get(1).unwrap();
1394        let inner_list0_1 = inner_list0_1.as_list().unwrap();
1395        assert_eq!(inner_list0_1.len(), 1);
1396        assert_eq!(inner_list0_1.get(0), Some(Variant::from(3)));
1397
1398        // Row 1: null
1399        assert!(variant_array.is_null(1));
1400    }
1401
1402    #[test]
1403    fn test_map_row_builder() {
1404        use arrow::array::{Int32Array, MapArray, StringArray, StructArray};
1405        use arrow::buffer::{NullBuffer, OffsetBuffer};
1406        use arrow::datatypes::{DataType, Field, Fields};
1407        use std::sync::Arc;
1408
1409        // Create the entries struct array (key-value pairs)
1410        let keys = StringArray::from(vec!["key1", "key2", "key3"]);
1411        let values = Int32Array::from(vec![1, 2, 3]);
1412        let entries_fields = Fields::from(vec![
1413            Field::new("key", DataType::Utf8, false),
1414            Field::new("value", DataType::Int32, true),
1415        ]);
1416        let entries = StructArray::new(
1417            entries_fields.clone(),
1418            vec![Arc::new(keys), Arc::new(values)],
1419            None, // No nulls in the entries themselves
1420        );
1421
1422        // Create offsets for 4 maps: [0..1], [1..1], [1..1], [1..3]
1423        // Map 0: {"key1": 1}    (1 entry)
1424        // Map 1: {}             (0 entries - empty)
1425        // Map 2: null           (0 entries but NULL via null buffer)
1426        // Map 3: {"key2": 2, "key3": 3}  (2 entries)
1427        let offsets = OffsetBuffer::new(vec![0, 1, 1, 1, 3].into());
1428
1429        // Create null buffer - map at index 2 is NULL
1430        let null_buffer = Some(NullBuffer::from(vec![true, true, false, true]));
1431
1432        // Create the map field
1433        let map_field = Arc::new(Field::new(
1434            "entries",
1435            DataType::Struct(entries_fields),
1436            false, // Keys are non-nullable
1437        ));
1438
1439        // Create MapArray using try_new
1440        let map_array = MapArray::try_new(
1441            map_field,
1442            offsets,
1443            entries,
1444            null_buffer,
1445            false, // not ordered
1446        )
1447        .unwrap();
1448
1449        let variant_array = execute_row_builder_test(&map_array);
1450
1451        // Map 0: {"key1": 1}
1452        let map0 = variant_array.value(0);
1453        let obj0 = map0.as_object().unwrap();
1454        assert_eq!(obj0.len(), 1);
1455        assert_eq!(obj0.get("key1"), Some(Variant::from(1)));
1456
1457        // Map 1: {} (empty object, not null)
1458        let map1 = variant_array.value(1);
1459        let obj1 = map1.as_object().unwrap();
1460        assert_eq!(obj1.len(), 0); // Empty object
1461
1462        // Map 2: null (actual NULL)
1463        assert!(variant_array.is_null(2));
1464
1465        // Map 3: {"key2": 2, "key3": 3}
1466        let map3 = variant_array.value(3);
1467        let obj3 = map3.as_object().unwrap();
1468        assert_eq!(obj3.len(), 2);
1469        assert_eq!(obj3.get("key2"), Some(Variant::from(2)));
1470        assert_eq!(obj3.get("key3"), Some(Variant::from(3)));
1471    }
1472
1473    #[test]
1474    fn test_union_sparse_row_builder() {
1475        use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1476        use arrow::buffer::ScalarBuffer;
1477        use arrow::datatypes::{DataType, Field, UnionFields};
1478        use std::sync::Arc;
1479
1480        // Create a sparse union array with mixed types (int, float, string)
1481        let int_array = Int32Array::from(vec![Some(1), None, None, None, Some(34), None]);
1482        let float_array = Float64Array::from(vec![None, Some(3.2), None, Some(32.5), None, None]);
1483        let string_array = StringArray::from(vec![None, None, Some("hello"), None, None, None]);
1484        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1485
1486        let union_fields = UnionFields::new(
1487            vec![0, 1, 2],
1488            vec![
1489                Field::new("int_field", DataType::Int32, false),
1490                Field::new("float_field", DataType::Float64, false),
1491                Field::new("string_field", DataType::Utf8, false),
1492            ],
1493        );
1494
1495        let children: Vec<Arc<dyn Array>> = vec![
1496            Arc::new(int_array),
1497            Arc::new(float_array),
1498            Arc::new(string_array),
1499        ];
1500
1501        let union_array = UnionArray::try_new(
1502            union_fields,
1503            type_ids,
1504            None, // Sparse union
1505            children,
1506        )
1507        .unwrap();
1508
1509        let variant_array = execute_row_builder_test(&union_array);
1510        assert_eq!(variant_array.value(0), Variant::Int32(1));
1511        assert_eq!(variant_array.value(1), Variant::Double(3.2));
1512        assert_eq!(variant_array.value(2), Variant::from("hello"));
1513        assert_eq!(variant_array.value(3), Variant::Double(32.5));
1514        assert_eq!(variant_array.value(4), Variant::Int32(34));
1515        assert!(variant_array.is_null(5));
1516    }
1517
1518    #[test]
1519    fn test_union_dense_row_builder() {
1520        use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1521        use arrow::buffer::ScalarBuffer;
1522        use arrow::datatypes::{DataType, Field, UnionFields};
1523        use std::sync::Arc;
1524
1525        // Create a dense union array with mixed types (int, float, string)
1526        let int_array = Int32Array::from(vec![Some(1), Some(34), None]);
1527        let float_array = Float64Array::from(vec![3.2, 32.5]);
1528        let string_array = StringArray::from(vec!["hello"]);
1529        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1530        let offsets = [0, 0, 0, 1, 1, 2]
1531            .into_iter()
1532            .collect::<ScalarBuffer<i32>>();
1533
1534        let union_fields = UnionFields::new(
1535            vec![0, 1, 2],
1536            vec![
1537                Field::new("int_field", DataType::Int32, false),
1538                Field::new("float_field", DataType::Float64, false),
1539                Field::new("string_field", DataType::Utf8, false),
1540            ],
1541        );
1542
1543        let children: Vec<Arc<dyn Array>> = vec![
1544            Arc::new(int_array),
1545            Arc::new(float_array),
1546            Arc::new(string_array),
1547        ];
1548
1549        let union_array = UnionArray::try_new(
1550            union_fields,
1551            type_ids,
1552            Some(offsets), // Dense union
1553            children,
1554        )
1555        .unwrap();
1556
1557        // Test the row builder
1558        let options = CastOptions::default();
1559        let mut row_builder =
1560            make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1561                .unwrap();
1562
1563        let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1564        for i in 0..union_array.len() {
1565            row_builder.append_row(&mut variant_builder, i).unwrap();
1566        }
1567        let variant_array = variant_builder.build();
1568
1569        assert_eq!(variant_array.len(), 6);
1570        assert_eq!(variant_array.value(0), Variant::Int32(1));
1571        assert_eq!(variant_array.value(1), Variant::Double(3.2));
1572        assert_eq!(variant_array.value(2), Variant::from("hello"));
1573        assert_eq!(variant_array.value(3), Variant::Double(32.5));
1574        assert_eq!(variant_array.value(4), Variant::Int32(34));
1575        assert!(variant_array.is_null(5));
1576    }
1577
1578    #[test]
1579    fn test_union_sparse_type_ids_row_builder() {
1580        use arrow::array::{Int32Array, StringArray, UnionArray};
1581        use arrow::buffer::ScalarBuffer;
1582        use arrow::datatypes::{DataType, Field, UnionFields};
1583        use std::sync::Arc;
1584
1585        // Create a sparse union with non-contiguous type IDs (1, 3)
1586        let int_array = Int32Array::from(vec![Some(42), None]);
1587        let string_array = StringArray::from(vec![None, Some("test")]);
1588        let type_ids = [1, 3].into_iter().collect::<ScalarBuffer<i8>>();
1589
1590        let union_fields = UnionFields::new(
1591            vec![1, 3], // Non-contiguous type IDs
1592            vec![
1593                Field::new("int_field", DataType::Int32, false),
1594                Field::new("string_field", DataType::Utf8, false),
1595            ],
1596        );
1597
1598        let children: Vec<Arc<dyn Array>> = vec![Arc::new(int_array), Arc::new(string_array)];
1599
1600        let union_array = UnionArray::try_new(
1601            union_fields,
1602            type_ids,
1603            None, // Sparse union
1604            children,
1605        )
1606        .unwrap();
1607
1608        // Test the row builder
1609        let options = CastOptions::default();
1610        let mut row_builder =
1611            make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1612                .unwrap();
1613
1614        let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1615        for i in 0..union_array.len() {
1616            row_builder.append_row(&mut variant_builder, i).unwrap();
1617        }
1618        let variant_array = variant_builder.build();
1619
1620        // Verify results
1621        assert_eq!(variant_array.len(), 2);
1622
1623        // Row 0: int 42 (type_id = 1)
1624        assert_eq!(variant_array.value(0), Variant::Int32(42));
1625
1626        // Row 1: string "test" (type_id = 3)
1627        assert_eq!(variant_array.value(1), Variant::from("test"));
1628    }
1629
1630    #[test]
1631    fn test_decimal32_row_builder() {
1632        use arrow::array::Decimal32Array;
1633        use parquet_variant::VariantDecimal4;
1634
1635        // Test Decimal32Array with scale 2 (e.g., for currency: 12.34)
1636        let decimal_array = Decimal32Array::from(vec![Some(1234), None, Some(-5678)])
1637            .with_precision_and_scale(9, 2)
1638            .unwrap();
1639
1640        test_row_builder_basic(
1641            &decimal_array,
1642            vec![
1643                Some(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())),
1644                None,
1645                Some(Variant::from(VariantDecimal4::try_new(-5678, 2).unwrap())),
1646            ],
1647        );
1648    }
1649
1650    #[test]
1651    fn test_decimal128_row_builder() {
1652        use arrow::array::Decimal128Array;
1653        use parquet_variant::VariantDecimal16;
1654
1655        // Test Decimal128Array with negative scale (multiply by 10^|scale|)
1656        let decimal_array = Decimal128Array::from(vec![Some(123), None, Some(456)])
1657            .with_precision_and_scale(10, -2)
1658            .unwrap();
1659
1660        test_row_builder_basic(
1661            &decimal_array,
1662            vec![
1663                Some(Variant::from(VariantDecimal16::try_new(12300, 0).unwrap())),
1664                None,
1665                Some(Variant::from(VariantDecimal16::try_new(45600, 0).unwrap())),
1666            ],
1667        );
1668    }
1669
1670    #[test]
1671    fn test_decimal256_overflow_row_builder() {
1672        use arrow::array::Decimal256Array;
1673        use arrow::datatypes::i256;
1674
1675        // Test Decimal256Array with a value that overflows i128
1676        let large_value = i256::from_i128(i128::MAX) + i256::from(1); // Overflows i128
1677        let decimal_array = Decimal256Array::from(vec![Some(large_value), Some(i256::from(123))])
1678            .with_precision_and_scale(76, 3)
1679            .unwrap();
1680
1681        test_row_builder_basic_with_options(
1682            &decimal_array,
1683            vec![
1684                Some(Variant::Null), // Overflow value becomes Variant::Null
1685                Some(Variant::from(VariantDecimal16::try_new(123, 3).unwrap())),
1686            ],
1687            CastOptions { strict: false },
1688        );
1689    }
1690
1691    #[test]
1692    fn test_binary_row_builder() {
1693        use arrow::array::BinaryArray;
1694
1695        let binary_data = vec![
1696            Some(b"hello".as_slice()),
1697            None,
1698            Some(b"\x00\x01\x02\xFF".as_slice()),
1699            Some(b"".as_slice()), // Empty binary
1700        ];
1701        let binary_array = BinaryArray::from(binary_data);
1702
1703        test_row_builder_basic(
1704            &binary_array,
1705            vec![
1706                Some(Variant::from(b"hello".as_slice())),
1707                None,
1708                Some(Variant::from([0x00, 0x01, 0x02, 0xFF].as_slice())),
1709                Some(Variant::from([].as_slice())),
1710            ],
1711        );
1712    }
1713
1714    #[test]
1715    fn test_binary_view_row_builder() {
1716        use arrow::array::BinaryViewArray;
1717
1718        let binary_data = vec![
1719            Some(b"short".as_slice()),
1720            None,
1721            Some(b"this is a longer binary view that exceeds inline storage".as_slice()),
1722        ];
1723        let binary_view_array = BinaryViewArray::from(binary_data);
1724
1725        test_row_builder_basic(
1726            &binary_view_array,
1727            vec![
1728                Some(Variant::from(b"short".as_slice())),
1729                None,
1730                Some(Variant::from(
1731                    b"this is a longer binary view that exceeds inline storage".as_slice(),
1732                )),
1733            ],
1734        );
1735    }
1736
1737    #[test]
1738    fn test_fixed_size_binary_row_builder() {
1739        use arrow::array::FixedSizeBinaryArray;
1740
1741        let binary_data = vec![
1742            Some([0x01, 0x02, 0x03, 0x04]),
1743            None,
1744            Some([0xFF, 0xFE, 0xFD, 0xFC]),
1745        ];
1746        let fixed_binary_array =
1747            FixedSizeBinaryArray::try_from_sparse_iter_with_size(binary_data.into_iter(), 4)
1748                .unwrap();
1749
1750        test_row_builder_basic(
1751            &fixed_binary_array,
1752            vec![
1753                Some(Variant::from([0x01, 0x02, 0x03, 0x04].as_slice())),
1754                None,
1755                Some(Variant::from([0xFF, 0xFE, 0xFD, 0xFC].as_slice())),
1756            ],
1757        );
1758    }
1759
1760    #[test]
1761    fn test_utf8_view_row_builder() {
1762        use arrow::array::StringViewArray;
1763
1764        let string_data = vec![
1765            Some("short"),
1766            None,
1767            Some("this is a much longer string that will be stored out-of-line in the buffer"),
1768        ];
1769        let string_view_array = StringViewArray::from(string_data);
1770
1771        test_row_builder_basic(
1772            &string_view_array,
1773            vec![
1774                Some(Variant::from("short")),
1775                None,
1776                Some(Variant::from(
1777                    "this is a much longer string that will be stored out-of-line in the buffer",
1778                )),
1779            ],
1780        );
1781    }
1782
1783    #[test]
1784    fn test_timestamp_second_row_builder() {
1785        use arrow::array::TimestampSecondArray;
1786
1787        let timestamp_data = vec![
1788            Some(1609459200), // 2021-01-01 00:00:00 UTC
1789            None,
1790            Some(1640995200), // 2022-01-01 00:00:00 UTC
1791        ];
1792        let timestamp_array = TimestampSecondArray::from(timestamp_data);
1793
1794        let expected_naive1 = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1795        let expected_naive2 = DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc();
1796
1797        test_row_builder_basic(
1798            &timestamp_array,
1799            vec![
1800                Some(Variant::from(expected_naive1)),
1801                None,
1802                Some(Variant::from(expected_naive2)),
1803            ],
1804        );
1805    }
1806
1807    #[test]
1808    fn test_timestamp_with_timezone_row_builder() {
1809        use arrow::array::TimestampMicrosecondArray;
1810        use chrono::DateTime;
1811
1812        let timestamp_data = vec![
1813            Some(1609459200000000), // 2021-01-01 00:00:00 UTC (in microseconds)
1814            None,
1815            Some(1640995200000000), // 2022-01-01 00:00:00 UTC (in microseconds)
1816        ];
1817        let timezone = "UTC".to_string();
1818        let timestamp_array =
1819            TimestampMicrosecondArray::from(timestamp_data).with_timezone(timezone);
1820
1821        let expected_utc1 = DateTime::from_timestamp(1609459200, 0).unwrap();
1822        let expected_utc2 = DateTime::from_timestamp(1640995200, 0).unwrap();
1823
1824        test_row_builder_basic(
1825            &timestamp_array,
1826            vec![
1827                Some(Variant::from(expected_utc1)),
1828                None,
1829                Some(Variant::from(expected_utc2)),
1830            ],
1831        );
1832    }
1833
1834    #[test]
1835    fn test_timestamp_nanosecond_precision_row_builder() {
1836        use arrow::array::TimestampNanosecondArray;
1837
1838        let timestamp_data = vec![
1839            Some(1609459200123456789), // 2021-01-01 00:00:00.123456789 UTC
1840            None,
1841            Some(1609459200000000000), // 2021-01-01 00:00:00.000000000 UTC (no fractional seconds)
1842        ];
1843        let timestamp_array = TimestampNanosecondArray::from(timestamp_data);
1844
1845        let expected_with_nanos = DateTime::from_timestamp(1609459200, 123456789)
1846            .unwrap()
1847            .naive_utc();
1848        let expected_no_nanos = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1849
1850        test_row_builder_basic(
1851            &timestamp_array,
1852            vec![
1853                Some(Variant::from(expected_with_nanos)),
1854                None,
1855                Some(Variant::from(expected_no_nanos)),
1856            ],
1857        );
1858    }
1859
1860    #[test]
1861    fn test_timestamp_millisecond_row_builder() {
1862        use arrow::array::TimestampMillisecondArray;
1863
1864        let timestamp_data = vec![
1865            Some(1609459200123), // 2021-01-01 00:00:00.123 UTC
1866            None,
1867            Some(1609459200000), // 2021-01-01 00:00:00.000 UTC
1868        ];
1869        let timestamp_array = TimestampMillisecondArray::from(timestamp_data);
1870
1871        let expected_with_millis = DateTime::from_timestamp(1609459200, 123000000)
1872            .unwrap()
1873            .naive_utc();
1874        let expected_no_millis = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1875
1876        test_row_builder_basic(
1877            &timestamp_array,
1878            vec![
1879                Some(Variant::from(expected_with_millis)),
1880                None,
1881                Some(Variant::from(expected_no_millis)),
1882            ],
1883        );
1884    }
1885
1886    #[test]
1887    fn test_date32_row_builder() {
1888        use arrow::array::Date32Array;
1889        use chrono::NaiveDate;
1890
1891        let date_data = vec![
1892            Some(0), // 1970-01-01
1893            None,
1894            Some(19723),   // 2024-01-01 (days since epoch)
1895            Some(-719162), // 0001-01-01 (near minimum)
1896        ];
1897        let date_array = Date32Array::from(date_data);
1898
1899        let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1900        let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1901        let expected_min = NaiveDate::from_ymd_opt(1, 1, 1).unwrap();
1902
1903        test_row_builder_basic(
1904            &date_array,
1905            vec![
1906                Some(Variant::from(expected_epoch)),
1907                None,
1908                Some(Variant::from(expected_2024)),
1909                Some(Variant::from(expected_min)),
1910            ],
1911        );
1912    }
1913
1914    #[test]
1915    fn test_date64_row_builder() {
1916        use arrow::array::Date64Array;
1917        use chrono::NaiveDate;
1918
1919        // Test Date64Array with various dates (milliseconds since epoch)
1920        let date_data = vec![
1921            Some(0), // 1970-01-01
1922            None,
1923            Some(1704067200000), // 2024-01-01 (milliseconds since epoch)
1924            Some(86400000),      // 1970-01-02
1925        ];
1926        let date_array = Date64Array::from(date_data);
1927
1928        let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1929        let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1930        let expected_next_day = NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();
1931
1932        test_row_builder_basic(
1933            &date_array,
1934            vec![
1935                Some(Variant::from(expected_epoch)),
1936                None,
1937                Some(Variant::from(expected_2024)),
1938                Some(Variant::from(expected_next_day)),
1939            ],
1940        );
1941    }
1942
1943    #[test]
1944    fn test_time32_second_row_builder() {
1945        use arrow::array::Time32SecondArray;
1946        use chrono::NaiveTime;
1947
1948        // Test Time32SecondArray with various times (seconds since midnight)
1949        let time_data = vec![
1950            Some(0), // 00:00:00
1951            None,
1952            Some(3661),  // 01:01:01
1953            Some(86399), // 23:59:59
1954        ];
1955        let time_array = Time32SecondArray::from(time_data);
1956
1957        let expected_midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
1958        let expected_time = NaiveTime::from_hms_opt(1, 1, 1).unwrap();
1959        let expected_last = NaiveTime::from_hms_opt(23, 59, 59).unwrap();
1960
1961        test_row_builder_basic(
1962            &time_array,
1963            vec![
1964                Some(Variant::from(expected_midnight)),
1965                None,
1966                Some(Variant::from(expected_time)),
1967                Some(Variant::from(expected_last)),
1968            ],
1969        );
1970    }
1971
1972    #[test]
1973    fn test_time32_millisecond_row_builder() {
1974        use arrow::array::Time32MillisecondArray;
1975        use chrono::NaiveTime;
1976
1977        // Test Time32MillisecondArray with various times (milliseconds since midnight)
1978        let time_data = vec![
1979            Some(0), // 00:00:00.000
1980            None,
1981            Some(3661123),  // 01:01:01.123
1982            Some(86399999), // 23:59:59.999
1983        ];
1984        let time_array = Time32MillisecondArray::from(time_data);
1985
1986        let expected_midnight = NaiveTime::from_hms_milli_opt(0, 0, 0, 0).unwrap();
1987        let expected_time = NaiveTime::from_hms_milli_opt(1, 1, 1, 123).unwrap();
1988        let expected_last = NaiveTime::from_hms_milli_opt(23, 59, 59, 999).unwrap();
1989
1990        test_row_builder_basic(
1991            &time_array,
1992            vec![
1993                Some(Variant::from(expected_midnight)),
1994                None,
1995                Some(Variant::from(expected_time)),
1996                Some(Variant::from(expected_last)),
1997            ],
1998        );
1999    }
2000
2001    #[test]
2002    fn test_time64_microsecond_row_builder() {
2003        use arrow::array::Time64MicrosecondArray;
2004        use chrono::NaiveTime;
2005
2006        // Test Time64MicrosecondArray with various times (microseconds since midnight)
2007        let time_data = vec![
2008            Some(0), // 00:00:00.000000
2009            None,
2010            Some(3661123456),  // 01:01:01.123456
2011            Some(86399999999), // 23:59:59.999999
2012        ];
2013        let time_array = Time64MicrosecondArray::from(time_data);
2014
2015        let expected_midnight = NaiveTime::from_hms_micro_opt(0, 0, 0, 0).unwrap();
2016        let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2017        let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2018
2019        test_row_builder_basic(
2020            &time_array,
2021            vec![
2022                Some(Variant::from(expected_midnight)),
2023                None,
2024                Some(Variant::from(expected_time)),
2025                Some(Variant::from(expected_last)),
2026            ],
2027        );
2028    }
2029
2030    #[test]
2031    fn test_time64_nanosecond_row_builder() {
2032        use arrow::array::Time64NanosecondArray;
2033        use chrono::NaiveTime;
2034
2035        // Test Time64NanosecondArray with various times (nanoseconds since midnight)
2036        let time_data = vec![
2037            Some(0), // 00:00:00.000000000
2038            None,
2039            Some(3661123456789),  // 01:01:01.123456789
2040            Some(86399999999999), // 23:59:59.999999999
2041        ];
2042        let time_array = Time64NanosecondArray::from(time_data);
2043
2044        let expected_midnight = NaiveTime::from_hms_nano_opt(0, 0, 0, 0).unwrap();
2045        // Nanoseconds are truncated to microsecond precision in Variant
2046        let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2047        let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2048
2049        test_row_builder_basic(
2050            &time_array,
2051            vec![
2052                Some(Variant::from(expected_midnight)),
2053                None,
2054                Some(Variant::from(expected_time)),
2055                Some(Variant::from(expected_last)),
2056            ],
2057        );
2058    }
2059}