parquet_variant_compute/
arrow_to_variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::type_conversion::CastOptions;
19use arrow::array::{
20    Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray,
21    GenericStringArray, OffsetSizeTrait, PrimitiveArray,
22};
23use arrow::compute::kernels::cast;
24use arrow::datatypes::{
25    self as datatypes, ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType,
26    DecimalType, RunEndIndexType,
27};
28use arrow::temporal_conversions::{as_date, as_datetime, as_time};
29use arrow_schema::{ArrowError, DataType, TimeUnit};
30use chrono::{DateTime, TimeZone, Utc};
31use parquet_variant::{
32    ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
33    VariantDecimal16, VariantDecimalType,
34};
35use std::collections::HashMap;
36use std::ops::Range;
37
38// ============================================================================
39// Row-oriented builders for efficient Arrow-to-Variant conversion
40// ============================================================================
41
42/// Row builder for converting Arrow arrays to VariantArray row by row
43pub(crate) enum ArrowToVariantRowBuilder<'a> {
44    Null(NullArrowToVariantBuilder),
45    Boolean(BooleanArrowToVariantBuilder<'a>),
46    PrimitiveInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::Int8Type>),
47    PrimitiveInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::Int16Type>),
48    PrimitiveInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::Int32Type>),
49    PrimitiveInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::Int64Type>),
50    PrimitiveUInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt8Type>),
51    PrimitiveUInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt16Type>),
52    PrimitiveUInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt32Type>),
53    PrimitiveUInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt64Type>),
54    PrimitiveFloat16(PrimitiveArrowToVariantBuilder<'a, datatypes::Float16Type>),
55    PrimitiveFloat32(PrimitiveArrowToVariantBuilder<'a, datatypes::Float32Type>),
56    PrimitiveFloat64(PrimitiveArrowToVariantBuilder<'a, datatypes::Float64Type>),
57    Decimal32(DecimalArrowToVariantBuilder<'a, datatypes::Decimal32Type, VariantDecimal4>),
58    Decimal64(DecimalArrowToVariantBuilder<'a, datatypes::Decimal64Type, VariantDecimal8>),
59    Decimal128(DecimalArrowToVariantBuilder<'a, datatypes::Decimal128Type, VariantDecimal16>),
60    Decimal256(Decimal256ArrowToVariantBuilder<'a>),
61    TimestampSecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampSecondType>),
62    TimestampMillisecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMillisecondType>),
63    TimestampMicrosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMicrosecondType>),
64    TimestampNanosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampNanosecondType>),
65    Date32(DateArrowToVariantBuilder<'a, datatypes::Date32Type>),
66    Date64(DateArrowToVariantBuilder<'a, datatypes::Date64Type>),
67    Time32Second(TimeArrowToVariantBuilder<'a, datatypes::Time32SecondType>),
68    Time32Millisecond(TimeArrowToVariantBuilder<'a, datatypes::Time32MillisecondType>),
69    Time64Microsecond(TimeArrowToVariantBuilder<'a, datatypes::Time64MicrosecondType>),
70    Time64Nanosecond(TimeArrowToVariantBuilder<'a, datatypes::Time64NanosecondType>),
71    Binary(BinaryArrowToVariantBuilder<'a, i32>),
72    LargeBinary(BinaryArrowToVariantBuilder<'a, i64>),
73    BinaryView(BinaryViewArrowToVariantBuilder<'a>),
74    FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder<'a>),
75    Utf8(StringArrowToVariantBuilder<'a, i32>),
76    LargeUtf8(StringArrowToVariantBuilder<'a, i64>),
77    Utf8View(StringViewArrowToVariantBuilder<'a>),
78    List(ListArrowToVariantBuilder<'a, GenericListArray<i32>>),
79    LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
80    ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
81    LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
82    FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
83    Struct(StructArrowToVariantBuilder<'a>),
84    Map(MapArrowToVariantBuilder<'a>),
85    Union(UnionArrowToVariantBuilder<'a>),
86    Dictionary(DictionaryArrowToVariantBuilder<'a>),
87    RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int16Type>),
88    RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int32Type>),
89    RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int64Type>),
90}
91
92impl<'a> ArrowToVariantRowBuilder<'a> {
93    /// Appends a single row at the given index to the supplied builder.
94    pub fn append_row(
95        &mut self,
96        builder: &mut impl VariantBuilderExt,
97        index: usize,
98    ) -> Result<(), ArrowError> {
99        use ArrowToVariantRowBuilder::*;
100        match self {
101            Null(b) => b.append_row(builder, index),
102            Boolean(b) => b.append_row(builder, index),
103            PrimitiveInt8(b) => b.append_row(builder, index),
104            PrimitiveInt16(b) => b.append_row(builder, index),
105            PrimitiveInt32(b) => b.append_row(builder, index),
106            PrimitiveInt64(b) => b.append_row(builder, index),
107            PrimitiveUInt8(b) => b.append_row(builder, index),
108            PrimitiveUInt16(b) => b.append_row(builder, index),
109            PrimitiveUInt32(b) => b.append_row(builder, index),
110            PrimitiveUInt64(b) => b.append_row(builder, index),
111            PrimitiveFloat16(b) => b.append_row(builder, index),
112            PrimitiveFloat32(b) => b.append_row(builder, index),
113            PrimitiveFloat64(b) => b.append_row(builder, index),
114            Decimal32(b) => b.append_row(builder, index),
115            Decimal64(b) => b.append_row(builder, index),
116            Decimal128(b) => b.append_row(builder, index),
117            Decimal256(b) => b.append_row(builder, index),
118            TimestampSecond(b) => b.append_row(builder, index),
119            TimestampMillisecond(b) => b.append_row(builder, index),
120            TimestampMicrosecond(b) => b.append_row(builder, index),
121            TimestampNanosecond(b) => b.append_row(builder, index),
122            Date32(b) => b.append_row(builder, index),
123            Date64(b) => b.append_row(builder, index),
124            Time32Second(b) => b.append_row(builder, index),
125            Time32Millisecond(b) => b.append_row(builder, index),
126            Time64Microsecond(b) => b.append_row(builder, index),
127            Time64Nanosecond(b) => b.append_row(builder, index),
128            Binary(b) => b.append_row(builder, index),
129            LargeBinary(b) => b.append_row(builder, index),
130            BinaryView(b) => b.append_row(builder, index),
131            FixedSizeBinary(b) => b.append_row(builder, index),
132            Utf8(b) => b.append_row(builder, index),
133            LargeUtf8(b) => b.append_row(builder, index),
134            Utf8View(b) => b.append_row(builder, index),
135            List(b) => b.append_row(builder, index),
136            LargeList(b) => b.append_row(builder, index),
137            ListView(b) => b.append_row(builder, index),
138            LargeListView(b) => b.append_row(builder, index),
139            FixedSizeList(b) => b.append_row(builder, index),
140            Struct(b) => b.append_row(builder, index),
141            Map(b) => b.append_row(builder, index),
142            Union(b) => b.append_row(builder, index),
143            Dictionary(b) => b.append_row(builder, index),
144            RunEndEncodedInt16(b) => b.append_row(builder, index),
145            RunEndEncodedInt32(b) => b.append_row(builder, index),
146            RunEndEncodedInt64(b) => b.append_row(builder, index),
147        }
148    }
149}
150
151/// Factory function to create the appropriate row builder for a given DataType
152pub(crate) fn make_arrow_to_variant_row_builder<'a>(
153    data_type: &'a DataType,
154    array: &'a dyn Array,
155    options: &'a CastOptions,
156) -> Result<ArrowToVariantRowBuilder<'a>, ArrowError> {
157    use ArrowToVariantRowBuilder::*;
158    let builder =
159        match data_type {
160            DataType::Null => Null(NullArrowToVariantBuilder),
161            DataType::Boolean => Boolean(BooleanArrowToVariantBuilder::new(array)),
162            DataType::Int8 => PrimitiveInt8(PrimitiveArrowToVariantBuilder::new(array)),
163            DataType::Int16 => PrimitiveInt16(PrimitiveArrowToVariantBuilder::new(array)),
164            DataType::Int32 => PrimitiveInt32(PrimitiveArrowToVariantBuilder::new(array)),
165            DataType::Int64 => PrimitiveInt64(PrimitiveArrowToVariantBuilder::new(array)),
166            DataType::UInt8 => PrimitiveUInt8(PrimitiveArrowToVariantBuilder::new(array)),
167            DataType::UInt16 => PrimitiveUInt16(PrimitiveArrowToVariantBuilder::new(array)),
168            DataType::UInt32 => PrimitiveUInt32(PrimitiveArrowToVariantBuilder::new(array)),
169            DataType::UInt64 => PrimitiveUInt64(PrimitiveArrowToVariantBuilder::new(array)),
170            DataType::Float16 => PrimitiveFloat16(PrimitiveArrowToVariantBuilder::new(array)),
171            DataType::Float32 => PrimitiveFloat32(PrimitiveArrowToVariantBuilder::new(array)),
172            DataType::Float64 => PrimitiveFloat64(PrimitiveArrowToVariantBuilder::new(array)),
173            DataType::Decimal32(_, scale) => {
174                Decimal32(DecimalArrowToVariantBuilder::new(array, options, *scale))
175            }
176            DataType::Decimal64(_, scale) => {
177                Decimal64(DecimalArrowToVariantBuilder::new(array, options, *scale))
178            }
179            DataType::Decimal128(_, scale) => {
180                Decimal128(DecimalArrowToVariantBuilder::new(array, options, *scale))
181            }
182            DataType::Decimal256(_, scale) => {
183                Decimal256(Decimal256ArrowToVariantBuilder::new(array, options, *scale))
184            }
185            DataType::Timestamp(time_unit, time_zone) => {
186                match time_unit {
187                    TimeUnit::Second => TimestampSecond(TimestampArrowToVariantBuilder::new(
188                        array,
189                        options,
190                        time_zone.is_some(),
191                    )),
192                    TimeUnit::Millisecond => TimestampMillisecond(
193                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
194                    ),
195                    TimeUnit::Microsecond => TimestampMicrosecond(
196                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
197                    ),
198                    TimeUnit::Nanosecond => TimestampNanosecond(
199                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
200                    ),
201                }
202            }
203            DataType::Date32 => Date32(DateArrowToVariantBuilder::new(array, options)),
204            DataType::Date64 => Date64(DateArrowToVariantBuilder::new(array, options)),
205            DataType::Time32(time_unit) => match time_unit {
206                TimeUnit::Second => Time32Second(TimeArrowToVariantBuilder::new(array, options)),
207                TimeUnit::Millisecond => {
208                    Time32Millisecond(TimeArrowToVariantBuilder::new(array, options))
209                }
210                _ => {
211                    return Err(ArrowError::CastError(format!(
212                        "Unsupported Time32 unit: {time_unit:?}"
213                    )));
214                }
215            },
216            DataType::Time64(time_unit) => match time_unit {
217                TimeUnit::Microsecond => {
218                    Time64Microsecond(TimeArrowToVariantBuilder::new(array, options))
219                }
220                TimeUnit::Nanosecond => {
221                    Time64Nanosecond(TimeArrowToVariantBuilder::new(array, options))
222                }
223                _ => {
224                    return Err(ArrowError::CastError(format!(
225                        "Unsupported Time64 unit: {time_unit:?}"
226                    )));
227                }
228            },
229            DataType::Duration(_) | DataType::Interval(_) => {
230                return Err(ArrowError::InvalidArgumentError(
231                    "Casting duration/interval types to Variant is not supported. \
232                    The Variant format does not define duration/interval types."
233                        .to_string(),
234                ));
235            }
236            DataType::Binary => Binary(BinaryArrowToVariantBuilder::new(array)),
237            DataType::LargeBinary => LargeBinary(BinaryArrowToVariantBuilder::new(array)),
238            DataType::BinaryView => BinaryView(BinaryViewArrowToVariantBuilder::new(array)),
239            DataType::FixedSizeBinary(_) => {
240                FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder::new(array))
241            }
242            DataType::Utf8 => Utf8(StringArrowToVariantBuilder::new(array)),
243            DataType::LargeUtf8 => LargeUtf8(StringArrowToVariantBuilder::new(array)),
244            DataType::Utf8View => Utf8View(StringViewArrowToVariantBuilder::new(array)),
245            DataType::List(_) => List(ListArrowToVariantBuilder::new(array.as_list(), options)?),
246            DataType::LargeList(_) => {
247                LargeList(ListArrowToVariantBuilder::new(array.as_list(), options)?)
248            }
249            DataType::ListView(_) => ListView(ListArrowToVariantBuilder::new(
250                array.as_list_view(),
251                options,
252            )?),
253            DataType::LargeListView(_) => LargeListView(ListArrowToVariantBuilder::new(
254                array.as_list_view(),
255                options,
256            )?),
257            DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
258                array.as_fixed_size_list(),
259                options,
260            )?),
261            DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
262                array.as_struct(),
263                options,
264            )?),
265            DataType::Map(_, _) => Map(MapArrowToVariantBuilder::new(array, options)?),
266            DataType::Union(_, _) => Union(UnionArrowToVariantBuilder::new(array, options)?),
267            DataType::Dictionary(_, _) => {
268                Dictionary(DictionaryArrowToVariantBuilder::new(array, options)?)
269            }
270            DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
271                DataType::Int16 => {
272                    RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
273                }
274                DataType::Int32 => {
275                    RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
276                }
277                DataType::Int64 => {
278                    RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
279                }
280                _ => {
281                    return Err(ArrowError::CastError(format!(
282                        "Unsupported run ends type: {}",
283                        run_ends.data_type()
284                    )));
285                }
286            },
287        };
288    Ok(builder)
289}
290
291/// Macro to define (possibly generic) row builders with consistent structure and behavior.
292///
293/// The macro optionally allows to define a transform for values read from the underlying
294/// array. Transforms of the form `|value| { ... }` are infallible (and should produce something
295/// that implements `Into<Variant>`), while transforms of the form `|value| -> Option<_> { ... }`
296/// are fallible (and should produce `Option<impl Into<Variant>>`); a failed tarnsform will either
297/// append null to the builder or return an error, depending on cast options.
298///
299/// Also supports optional extra fields that are passed to the constructor and which are available
300/// by reference in the value transform. Providing a fallible value transform requires also
301/// providing the extra field `options: &'a CastOptions`.
302// TODO: If/when the macro_metavar_expr feature stabilizes, the `ignore` meta-function would allow
303// us to "use" captured tokens without emitting them:
304//
305// ```
306// $(
307//     ${ignore($value)}
308//     $(
309//         ${ignore($option_ty)}
310//         options: &$lifetime CastOptions,
311//     )?
312// )?
313// ```
314//
315// That, in turn, would allow us to inject the `options` field whenever the user specifies a
316// fallible value transform, instead of requiring them to manually define it. This might not be
317// worth the trouble, tho, because it makes for some pretty bulky and unwieldy macro expansions.
318macro_rules! define_row_builder {
319    (
320        struct $name:ident<$lifetime:lifetime $(, $generic:ident $( : $bound:path )? )*>
321        $( where $where_path:path: $where_bound:path $(,)? )?
322        $({ $( $field:ident: $field_type:ty ),+ $(,)? })?,
323        |$array_param:ident| -> $array_type:ty { $init_expr:expr }
324        $(, |$value:ident| $(-> Option<$option_ty:ty>)? $value_transform:expr )?
325    ) => {
326        pub(crate) struct $name<$lifetime $(, $generic: $( $bound )? )*>
327        $( where $where_path: $where_bound )?
328        {
329            array: &$lifetime $array_type,
330            $( $( $field: $field_type, )+ )?
331            _phantom: std::marker::PhantomData<($( $generic, )*)>, // capture all type params
332        }
333
334        impl<$lifetime $(, $generic: $( $bound )? )*> $name<$lifetime $(, $generic)*>
335        $( where $where_path: $where_bound )?
336        {
337            pub(crate) fn new($array_param: &$lifetime dyn Array $( $(, $field: $field_type )+ )?) -> Self {
338                Self {
339                    array: $init_expr,
340                    $( $( $field, )+ )?
341                    _phantom: std::marker::PhantomData,
342                }
343            }
344
345            fn append_row(&self, builder: &mut impl VariantBuilderExt, index: usize) -> Result<(), ArrowError> {
346                if self.array.is_null(index) {
347                    builder.append_null();
348                } else {
349                    // Macro hygiene: Give any extra fields names the value transform can access.
350                    //
351                    // The value transform doesn't normally reference cast options, but the macro's
352                    // caller still has to declare the field because stable rust has no way to "use"
353                    // a captured token without emitting it. So, silence unused variable warnings,
354                    // assuming that's the `options` field. Unfortunately, that also silences
355                    // legitimate compiler warnings if an infallible value transform fails to use
356                    // its first extra field.
357                    $(
358                        #[allow(unused)]
359                        $( let $field = &self.$field; )+
360                    )?
361
362                    // Apply the value transform, if any (with name swapping for hygiene)
363                    let value = self.array.value(index);
364                    $(
365                        let $value = value;
366                        let value = $value_transform;
367                        $(
368                            // NOTE: The `?` macro expansion fails without the type annotation.
369                            let Some(value): Option<$option_ty> = value else {
370                                if self.options.strict {
371                                    return Err(ArrowError::ComputeError(format!(
372                                        "Failed to convert value at index {index}: conversion failed",
373                                    )));
374                                } else {
375                                    // Overflow is encoded as Variant::Null,
376                                    // distinct from None indicating a missing value
377                                    builder.append_value(Variant::Null);
378                                    return Ok(());
379                                }
380                            };
381                        )?
382                    )?
383                    builder.append_value(value);
384                }
385                Ok(())
386            }
387        }
388    };
389}
390
391define_row_builder!(
392    struct BooleanArrowToVariantBuilder<'a>,
393    |array| -> arrow::array::BooleanArray { array.as_boolean() }
394);
395
396define_row_builder!(
397    struct PrimitiveArrowToVariantBuilder<'a, T: ArrowPrimitiveType>
398    where T::Native: Into<Variant<'a, 'a>>,
399    |array| -> PrimitiveArray<T> { array.as_primitive() }
400);
401
402define_row_builder!(
403    struct DecimalArrowToVariantBuilder<'a, A: DecimalType, V>
404    where
405        V: VariantDecimalType<Native = A::Native>,
406    {
407        options: &'a CastOptions,
408        scale: i8,
409    },
410    |array| -> PrimitiveArray<A> { array.as_primitive() },
411    |value| -> Option<_> { V::try_new_with_signed_scale(value, *scale).ok() }
412);
413
414// Decimal256 needs a two-stage conversion via i128
415define_row_builder!(
416    struct Decimal256ArrowToVariantBuilder<'a> {
417        options: &'a CastOptions,
418        scale: i8,
419    },
420    |array| -> arrow::array::Decimal256Array { array.as_primitive() },
421    |value| -> Option<_> {
422        let value = value.to_i128();
423        value.and_then(|v| VariantDecimal16::try_new_with_signed_scale(v, *scale).ok())
424    }
425);
426
427define_row_builder!(
428    struct TimestampArrowToVariantBuilder<'a, T: ArrowTimestampType> {
429        options: &'a CastOptions,
430        has_time_zone: bool,
431    },
432    |array| -> PrimitiveArray<T> { array.as_primitive() },
433    |value| -> Option<_> {
434        // Convert using Arrow's temporal conversion functions
435        as_datetime::<T>(value).map(|naive_datetime| {
436            if *has_time_zone {
437                // Has timezone -> DateTime<Utc> -> TimestampMicros/TimestampNanos
438                let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&naive_datetime);
439                Variant::from(utc_dt) // Uses From<DateTime<Utc>> for Variant
440            } else {
441                // No timezone -> NaiveDateTime -> TimestampNtzMicros/TimestampNtzNanos
442                Variant::from(naive_datetime) // Uses From<NaiveDateTime> for Variant
443            }
444        })
445    }
446);
447
448define_row_builder!(
449    struct DateArrowToVariantBuilder<'a, T: ArrowTemporalType>
450    where
451        i64: From<T::Native>,
452    {
453        options: &'a CastOptions,
454    },
455    |array| -> PrimitiveArray<T> { array.as_primitive() },
456    |value| -> Option<_> {
457        let date_value = i64::from(value);
458        as_date::<T>(date_value)
459    }
460);
461
462define_row_builder!(
463    struct TimeArrowToVariantBuilder<'a, T: ArrowTemporalType>
464    where
465        i64: From<T::Native>,
466    {
467        options: &'a CastOptions,
468    },
469    |array| -> PrimitiveArray<T> { array.as_primitive() },
470    |value| -> Option<_> {
471        let time_value = i64::from(value);
472        as_time::<T>(time_value)
473    }
474);
475
476define_row_builder!(
477    struct BinaryArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
478    |array| -> GenericBinaryArray<O> { array.as_binary() }
479);
480
481define_row_builder!(
482    struct BinaryViewArrowToVariantBuilder<'a>,
483    |array| -> arrow::array::BinaryViewArray { array.as_byte_view() }
484);
485
486define_row_builder!(
487    struct FixedSizeBinaryArrowToVariantBuilder<'a>,
488    |array| -> arrow::array::FixedSizeBinaryArray { array.as_fixed_size_binary() }
489);
490
491define_row_builder!(
492    struct StringArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
493    |array| -> GenericStringArray<O> { array.as_string() }
494);
495
496define_row_builder!(
497    struct StringViewArrowToVariantBuilder<'a>,
498    |array| -> arrow::array::StringViewArray { array.as_string_view() }
499);
500
501/// Null builder that always appends null
502pub(crate) struct NullArrowToVariantBuilder;
503
504impl NullArrowToVariantBuilder {
505    fn append_row(
506        &mut self,
507        builder: &mut impl VariantBuilderExt,
508        _index: usize,
509    ) -> Result<(), ArrowError> {
510        builder.append_null();
511        Ok(())
512    }
513}
514
515/// Generic list builder for ListLikeArray types including List, LargeList, ListView, LargeListView,
516/// and FixedSizeList
517pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
518    list_array: &'a L,
519    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
520}
521
522impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
523    pub(crate) fn new(array: &'a L, options: &'a CastOptions) -> Result<Self, ArrowError> {
524        let values = array.values();
525        let values_builder =
526            make_arrow_to_variant_row_builder(values.data_type(), values, options)?;
527
528        Ok(Self {
529            list_array: array,
530            values_builder: Box::new(values_builder),
531        })
532    }
533
534    fn append_row(
535        &mut self,
536        builder: &mut impl VariantBuilderExt,
537        index: usize,
538    ) -> Result<(), ArrowError> {
539        if self.list_array.is_null(index) {
540            builder.append_null();
541            return Ok(());
542        }
543
544        let range = self.list_array.element_range(index);
545
546        let mut list_builder = builder.try_new_list()?;
547        for value_index in range {
548            self.values_builder
549                .append_row(&mut list_builder, value_index)?;
550        }
551        list_builder.finish();
552        Ok(())
553    }
554}
555
556/// Trait for list-like arrays that can provide element ranges
557pub(crate) trait ListLikeArray: Array {
558    /// Get the values array
559    fn values(&self) -> &dyn Array;
560
561    /// Get the start and end indices for a list element
562    fn element_range(&self, index: usize) -> Range<usize>;
563}
564
565impl<O: OffsetSizeTrait> ListLikeArray for GenericListArray<O> {
566    fn values(&self) -> &dyn Array {
567        self.values()
568    }
569
570    fn element_range(&self, index: usize) -> Range<usize> {
571        let offsets = self.offsets();
572        let start = offsets[index].as_usize();
573        let end = offsets[index + 1].as_usize();
574        start..end
575    }
576}
577
578impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
579    fn values(&self) -> &dyn Array {
580        self.values()
581    }
582
583    fn element_range(&self, index: usize) -> Range<usize> {
584        let offsets = self.value_offsets();
585        let sizes = self.value_sizes();
586        let offset = offsets[index].as_usize();
587        let size = sizes[index].as_usize();
588        offset..(offset + size)
589    }
590}
591
592impl ListLikeArray for FixedSizeListArray {
593    fn values(&self) -> &dyn Array {
594        self.values()
595    }
596
597    fn element_range(&self, index: usize) -> Range<usize> {
598        let value_length = self.value_length().as_usize();
599        let offset = index * value_length;
600        offset..(offset + value_length)
601    }
602}
603
604/// Struct builder for StructArray
605pub(crate) struct StructArrowToVariantBuilder<'a> {
606    struct_array: &'a arrow::array::StructArray,
607    field_builders: Vec<(&'a str, ArrowToVariantRowBuilder<'a>)>,
608}
609
610impl<'a> StructArrowToVariantBuilder<'a> {
611    pub(crate) fn new(
612        struct_array: &'a arrow::array::StructArray,
613        options: &'a CastOptions,
614    ) -> Result<Self, ArrowError> {
615        let mut field_builders = Vec::new();
616
617        // Create a row builder for each field
618        for (field_name, field_array) in struct_array
619            .column_names()
620            .iter()
621            .zip(struct_array.columns().iter())
622        {
623            let field_builder = make_arrow_to_variant_row_builder(
624                field_array.data_type(),
625                field_array.as_ref(),
626                options,
627            )?;
628            field_builders.push((*field_name, field_builder));
629        }
630
631        Ok(Self {
632            struct_array,
633            field_builders,
634        })
635    }
636
637    fn append_row(
638        &mut self,
639        builder: &mut impl VariantBuilderExt,
640        index: usize,
641    ) -> Result<(), ArrowError> {
642        if self.struct_array.is_null(index) {
643            builder.append_null();
644        } else {
645            // Create object builder for this struct row
646            let mut obj_builder = builder.try_new_object()?;
647
648            // Process each field
649            for (field_name, row_builder) in &mut self.field_builders {
650                let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
651                row_builder.append_row(&mut field_builder, index)?;
652            }
653
654            obj_builder.finish();
655        }
656        Ok(())
657    }
658}
659
660/// Map builder for MapArray types
661pub(crate) struct MapArrowToVariantBuilder<'a> {
662    map_array: &'a arrow::array::MapArray,
663    key_strings: arrow::array::StringArray,
664    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
665}
666
667impl<'a> MapArrowToVariantBuilder<'a> {
668    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
669        let map_array = array.as_map();
670
671        // Pre-cast keys to strings once
672        let keys = cast(map_array.keys(), &DataType::Utf8)?;
673        let key_strings = keys.as_string::<i32>().clone();
674
675        // Create recursive builder for values
676        let values = map_array.values();
677        let values_builder =
678            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
679
680        Ok(Self {
681            map_array,
682            key_strings,
683            values_builder: Box::new(values_builder),
684        })
685    }
686
687    fn append_row(
688        &mut self,
689        builder: &mut impl VariantBuilderExt,
690        index: usize,
691    ) -> Result<(), ArrowError> {
692        // Check for NULL map first (via null bitmap)
693        if self.map_array.is_null(index) {
694            builder.append_null();
695            return Ok(());
696        }
697
698        let offsets = self.map_array.offsets();
699        let start = offsets[index].as_usize();
700        let end = offsets[index + 1].as_usize();
701
702        // Create object builder for this map
703        let mut object_builder = builder.try_new_object()?;
704
705        // Add each key-value pair (loop does nothing for empty maps - correct!)
706        for kv_index in start..end {
707            let key = self.key_strings.value(kv_index);
708            let mut field_builder = ObjectFieldBuilder::new(key, &mut object_builder);
709            self.values_builder
710                .append_row(&mut field_builder, kv_index)?;
711        }
712
713        object_builder.finish();
714        Ok(())
715    }
716}
717
718/// Union builder for both sparse and dense union arrays
719///
720/// NOTE: Union type ids are _not_ required to be dense, hence the hash map for child builders.
721pub(crate) struct UnionArrowToVariantBuilder<'a> {
722    union_array: &'a arrow::array::UnionArray,
723    child_builders: HashMap<i8, Box<ArrowToVariantRowBuilder<'a>>>,
724}
725
726impl<'a> UnionArrowToVariantBuilder<'a> {
727    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
728        let union_array = array.as_union();
729        let type_ids = union_array.type_ids();
730
731        // Create child builders for each union field
732        let mut child_builders = HashMap::new();
733        for &type_id in type_ids {
734            let child_array = union_array.child(type_id);
735            let child_builder = make_arrow_to_variant_row_builder(
736                child_array.data_type(),
737                child_array.as_ref(),
738                options,
739            )?;
740            child_builders.insert(type_id, Box::new(child_builder));
741        }
742
743        Ok(Self {
744            union_array,
745            child_builders,
746        })
747    }
748
749    fn append_row(
750        &mut self,
751        builder: &mut impl VariantBuilderExt,
752        index: usize,
753    ) -> Result<(), ArrowError> {
754        let type_id = self.union_array.type_id(index);
755        let value_offset = self.union_array.value_offset(index);
756
757        // Delegate to the appropriate child builder, or append null to handle an invalid type_id
758        match self.child_builders.get_mut(&type_id) {
759            Some(child_builder) => child_builder.append_row(builder, value_offset)?,
760            None => builder.append_null(),
761        }
762
763        Ok(())
764    }
765}
766
767/// Dictionary array builder with simple O(1) indexing
768pub(crate) struct DictionaryArrowToVariantBuilder<'a> {
769    keys: &'a dyn Array, // only needed for null checks
770    normalized_keys: Vec<usize>,
771    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
772}
773
774impl<'a> DictionaryArrowToVariantBuilder<'a> {
775    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
776        let dict_array = array.as_any_dictionary();
777        let values = dict_array.values();
778        let values_builder =
779            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
780
781        // WARNING: normalized_keys panics if values is empty
782        let normalized_keys = match values.len() {
783            0 => Vec::new(),
784            _ => dict_array.normalized_keys(),
785        };
786
787        Ok(Self {
788            keys: dict_array.keys(),
789            normalized_keys,
790            values_builder: Box::new(values_builder),
791        })
792    }
793
794    fn append_row(
795        &mut self,
796        builder: &mut impl VariantBuilderExt,
797        index: usize,
798    ) -> Result<(), ArrowError> {
799        if self.keys.is_null(index) {
800            builder.append_null();
801        } else {
802            let normalized_key = self.normalized_keys[index];
803            self.values_builder.append_row(builder, normalized_key)?;
804        }
805        Ok(())
806    }
807}
808
809/// Run-end encoded array builder with efficient sequential access
810pub(crate) struct RunEndEncodedArrowToVariantBuilder<'a, R: RunEndIndexType> {
811    run_array: &'a arrow::array::RunArray<R>,
812    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
813
814    run_ends: &'a [R::Native],
815    run_number: usize, // Physical index into run_ends and values
816    run_start: usize,  // Logical start index of current run
817}
818
819impl<'a, R: RunEndIndexType> RunEndEncodedArrowToVariantBuilder<'a, R> {
820    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
821        let Some(run_array) = array.as_run_opt() else {
822            return Err(ArrowError::CastError("Expected RunArray".to_string()));
823        };
824
825        let values = run_array.values();
826        let values_builder =
827            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
828
829        Ok(Self {
830            run_array,
831            values_builder: Box::new(values_builder),
832            run_ends: run_array.run_ends().values(),
833            run_number: 0,
834            run_start: 0,
835        })
836    }
837
838    fn set_run_for_index(&mut self, index: usize) -> Result<(), ArrowError> {
839        if index >= self.run_start {
840            let Some(run_end) = self.run_ends.get(self.run_number) else {
841                return Err(ArrowError::CastError(format!(
842                    "Index {index} beyond run array"
843                )));
844            };
845            if index < run_end.as_usize() {
846                return Ok(());
847            }
848            if index == run_end.as_usize() {
849                self.run_number += 1;
850                self.run_start = run_end.as_usize();
851                return Ok(());
852            }
853        }
854
855        // Use partition_point for all non-sequential cases
856        let run_number = self
857            .run_ends
858            .partition_point(|&run_end| run_end.as_usize() <= index);
859        if run_number >= self.run_ends.len() {
860            return Err(ArrowError::CastError(format!(
861                "Index {index} beyond run array"
862            )));
863        }
864        self.run_number = run_number;
865        self.run_start = match run_number {
866            0 => 0,
867            _ => self.run_ends[run_number - 1].as_usize(),
868        };
869        Ok(())
870    }
871
872    fn append_row(
873        &mut self,
874        builder: &mut impl VariantBuilderExt,
875        index: usize,
876    ) -> Result<(), ArrowError> {
877        self.set_run_for_index(index)?;
878
879        // Handle null values
880        if self.run_array.values().is_null(self.run_number) {
881            builder.append_null();
882            return Ok(());
883        }
884
885        // Re-encode the value
886        self.values_builder.append_row(builder, self.run_number)?;
887
888        Ok(())
889    }
890}
891
892#[cfg(test)]
893mod tests {
894    use super::*;
895    use crate::{VariantArray, VariantArrayBuilder};
896    use arrow::array::{ArrayRef, BooleanArray, Int32Array, StringArray};
897    use arrow::datatypes::Int32Type;
898    use std::sync::Arc;
899
900    /// Builds a VariantArray from an Arrow array using the row builder.
901    fn execute_row_builder_test(array: &dyn Array) -> VariantArray {
902        execute_row_builder_test_with_options(array, CastOptions::default())
903    }
904
905    /// Variant of `execute_row_builder_test` that allows specifying options
906    fn execute_row_builder_test_with_options(
907        array: &dyn Array,
908        options: CastOptions,
909    ) -> VariantArray {
910        let mut row_builder =
911            make_arrow_to_variant_row_builder(array.data_type(), array, &options).unwrap();
912
913        let mut array_builder = VariantArrayBuilder::new(array.len());
914
915        // The repetitive loop that appears in every test
916        for i in 0..array.len() {
917            row_builder.append_row(&mut array_builder, i).unwrap();
918        }
919
920        let variant_array = array_builder.build();
921        assert_eq!(variant_array.len(), array.len());
922        variant_array
923    }
924
925    /// Generic helper function to test row builders with basic assertion patterns.
926    /// Uses execute_row_builder_test and adds simple value comparison assertions.
927    fn test_row_builder_basic(array: &dyn Array, expected_values: Vec<Option<Variant>>) {
928        test_row_builder_basic_with_options(array, expected_values, CastOptions::default());
929    }
930
931    /// Variant of `test_row_builder_basic` that allows specifying options
932    fn test_row_builder_basic_with_options(
933        array: &dyn Array,
934        expected_values: Vec<Option<Variant>>,
935        options: CastOptions,
936    ) {
937        let variant_array = execute_row_builder_test_with_options(array, options);
938
939        // The repetitive assertion pattern
940        for (i, expected) in expected_values.iter().enumerate() {
941            match expected {
942                Some(variant) => {
943                    assert_eq!(variant_array.value(i), *variant, "Mismatch at index {}", i)
944                }
945                None => assert!(variant_array.is_null(i), "Expected null at index {}", i),
946            }
947        }
948    }
949
950    #[test]
951    fn test_primitive_row_builder() {
952        let int_array = Int32Array::from(vec![Some(42), None, Some(100)]);
953        test_row_builder_basic(
954            &int_array,
955            vec![Some(Variant::Int32(42)), None, Some(Variant::Int32(100))],
956        );
957    }
958
959    #[test]
960    fn test_string_row_builder() {
961        let string_array = StringArray::from(vec![Some("hello"), None, Some("world")]);
962        test_row_builder_basic(
963            &string_array,
964            vec![
965                Some(Variant::from("hello")),
966                None,
967                Some(Variant::from("world")),
968            ],
969        );
970    }
971
972    #[test]
973    fn test_boolean_row_builder() {
974        let bool_array = BooleanArray::from(vec![Some(true), None, Some(false)]);
975        test_row_builder_basic(
976            &bool_array,
977            vec![Some(Variant::from(true)), None, Some(Variant::from(false))],
978        );
979    }
980
981    #[test]
982    fn test_struct_row_builder() {
983        use arrow::array::{ArrayRef, Int32Array, StringArray, StructArray};
984        use arrow_schema::{DataType, Field};
985        use std::sync::Arc;
986
987        // Create a struct array with int and string fields
988        let int_field = Field::new("id", DataType::Int32, true);
989        let string_field = Field::new("name", DataType::Utf8, true);
990
991        let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
992        let string_array = StringArray::from(vec![Some("Alice"), Some("Bob"), None]);
993
994        let struct_array = StructArray::try_new(
995            vec![int_field, string_field].into(),
996            vec![
997                Arc::new(int_array) as ArrayRef,
998                Arc::new(string_array) as ArrayRef,
999            ],
1000            None,
1001        )
1002        .unwrap();
1003
1004        let variant_array = execute_row_builder_test(&struct_array);
1005
1006        // Check first row - should have both fields
1007        let first_variant = variant_array.value(0);
1008        assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1009        assert_eq!(
1010            first_variant.get_object_field("name"),
1011            Some(Variant::from("Alice"))
1012        );
1013
1014        // Check second row - should have name field but not id (null field omitted)
1015        let second_variant = variant_array.value(1);
1016        assert_eq!(second_variant.get_object_field("id"), None); // null field omitted
1017        assert_eq!(
1018            second_variant.get_object_field("name"),
1019            Some(Variant::from("Bob"))
1020        );
1021
1022        // Check third row - should have id field but not name (null field omitted)
1023        let third_variant = variant_array.value(2);
1024        assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(3)));
1025        assert_eq!(third_variant.get_object_field("name"), None); // null field omitted
1026    }
1027
1028    #[test]
1029    fn test_run_end_encoded_row_builder() {
1030        use arrow::array::{Int32Array, RunArray};
1031        use arrow::datatypes::Int32Type;
1032
1033        // Create a run-end encoded array: [A, A, B, B, B, C]
1034        // run_ends: [2, 5, 6]
1035        // values: ["A", "B", "C"]
1036        let values = StringArray::from(vec!["A", "B", "C"]);
1037        let run_ends = Int32Array::from(vec![2, 5, 6]);
1038        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1039
1040        let variant_array = execute_row_builder_test(&run_array);
1041
1042        // Verify the values
1043        assert_eq!(variant_array.value(0), Variant::from("A")); // Run 0
1044        assert_eq!(variant_array.value(1), Variant::from("A")); // Run 0
1045        assert_eq!(variant_array.value(2), Variant::from("B")); // Run 1
1046        assert_eq!(variant_array.value(3), Variant::from("B")); // Run 1
1047        assert_eq!(variant_array.value(4), Variant::from("B")); // Run 1
1048        assert_eq!(variant_array.value(5), Variant::from("C")); // Run 2
1049    }
1050
1051    #[test]
1052    fn test_run_end_encoded_random_access() {
1053        use arrow::array::{Int32Array, RunArray};
1054        use arrow::datatypes::Int32Type;
1055
1056        // Create a run-end encoded array: [A, A, B, B, B, C]
1057        let values = StringArray::from(vec!["A", "B", "C"]);
1058        let run_ends = Int32Array::from(vec![2, 5, 6]);
1059        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1060
1061        let options = CastOptions::default();
1062        let mut row_builder =
1063            make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1064
1065        // Test random access pattern (backward jumps, forward jumps)
1066        let access_pattern = [0, 5, 2, 4, 1, 3]; // Mix of all cases
1067        let expected_values = ["A", "C", "B", "B", "A", "B"];
1068
1069        for (i, &index) in access_pattern.iter().enumerate() {
1070            let mut array_builder = VariantArrayBuilder::new(1);
1071            row_builder.append_row(&mut array_builder, index).unwrap();
1072            let variant_array = array_builder.build();
1073            assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1074        }
1075    }
1076
1077    #[test]
1078    fn test_run_end_encoded_with_nulls() {
1079        use arrow::array::{Int32Array, RunArray};
1080        use arrow::datatypes::Int32Type;
1081
1082        // Create a run-end encoded array with null values: [A, A, null, null, B]
1083        let values = StringArray::from(vec![Some("A"), None, Some("B")]);
1084        let run_ends = Int32Array::from(vec![2, 4, 5]);
1085        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1086
1087        let options = CastOptions::default();
1088        let mut row_builder =
1089            make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1090        let mut array_builder = VariantArrayBuilder::new(5);
1091
1092        // Test sequential access
1093        for i in 0..5 {
1094            row_builder.append_row(&mut array_builder, i).unwrap();
1095        }
1096
1097        let variant_array = array_builder.build();
1098        assert_eq!(variant_array.len(), 5);
1099
1100        // Verify the values
1101        assert_eq!(variant_array.value(0), Variant::from("A")); // Run 0
1102        assert_eq!(variant_array.value(1), Variant::from("A")); // Run 0
1103        assert!(variant_array.is_null(2)); // Run 1 (null)
1104        assert!(variant_array.is_null(3)); // Run 1 (null)
1105        assert_eq!(variant_array.value(4), Variant::from("B")); // Run 2
1106    }
1107
1108    #[test]
1109    fn test_dictionary_row_builder() {
1110        use arrow::array::{DictionaryArray, Int32Array};
1111        use arrow::datatypes::Int32Type;
1112
1113        // Create a dictionary array: keys=[0, 1, 0, 2, 1], values=["apple", "banana", "cherry"]
1114        let values = StringArray::from(vec!["apple", "banana", "cherry"]);
1115        let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1116        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1117
1118        let variant_array = execute_row_builder_test(&dict_array);
1119
1120        // Verify the values match the dictionary lookup
1121        assert_eq!(variant_array.value(0), Variant::from("apple")); // keys[0] = 0 -> values[0] = "apple"
1122        assert_eq!(variant_array.value(1), Variant::from("banana")); // keys[1] = 1 -> values[1] = "banana"
1123        assert_eq!(variant_array.value(2), Variant::from("apple")); // keys[2] = 0 -> values[0] = "apple"
1124        assert_eq!(variant_array.value(3), Variant::from("cherry")); // keys[3] = 2 -> values[2] = "cherry"
1125        assert_eq!(variant_array.value(4), Variant::from("banana")); // keys[4] = 1 -> values[1] = "banana"
1126    }
1127
1128    #[test]
1129    fn test_dictionary_with_nulls() {
1130        use arrow::array::{DictionaryArray, Int32Array};
1131        use arrow::datatypes::Int32Type;
1132
1133        // Create a dictionary array with null keys: keys=[0, null, 1, null, 2], values=["x", "y", "z"]
1134        let values = StringArray::from(vec!["x", "y", "z"]);
1135        let keys = Int32Array::from(vec![Some(0), None, Some(1), None, Some(2)]);
1136        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1137
1138        let options = CastOptions::default();
1139        let mut row_builder =
1140            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1141                .unwrap();
1142        let mut array_builder = VariantArrayBuilder::new(5);
1143
1144        // Test sequential access
1145        for i in 0..5 {
1146            row_builder.append_row(&mut array_builder, i).unwrap();
1147        }
1148
1149        let variant_array = array_builder.build();
1150        assert_eq!(variant_array.len(), 5);
1151
1152        // Verify the values and nulls
1153        assert_eq!(variant_array.value(0), Variant::from("x")); // keys[0] = 0 -> values[0] = "x"
1154        assert!(variant_array.is_null(1)); // keys[1] = null
1155        assert_eq!(variant_array.value(2), Variant::from("y")); // keys[2] = 1 -> values[1] = "y"
1156        assert!(variant_array.is_null(3)); // keys[3] = null
1157        assert_eq!(variant_array.value(4), Variant::from("z")); // keys[4] = 2 -> values[2] = "z"
1158    }
1159
1160    #[test]
1161    fn test_dictionary_random_access() {
1162        use arrow::array::{DictionaryArray, Int32Array};
1163        use arrow::datatypes::Int32Type;
1164
1165        // Create a dictionary array: keys=[0, 1, 2, 0, 1, 2], values=["red", "green", "blue"]
1166        let values = StringArray::from(vec!["red", "green", "blue"]);
1167        let keys = Int32Array::from(vec![0, 1, 2, 0, 1, 2]);
1168        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1169
1170        let options = CastOptions::default();
1171        let mut row_builder =
1172            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1173                .unwrap();
1174
1175        // Test random access pattern
1176        let access_pattern = [5, 0, 3, 1, 4, 2]; // Random order
1177        let expected_values = ["blue", "red", "red", "green", "green", "blue"];
1178
1179        for (i, &index) in access_pattern.iter().enumerate() {
1180            let mut array_builder = VariantArrayBuilder::new(1);
1181            row_builder.append_row(&mut array_builder, index).unwrap();
1182            let variant_array = array_builder.build();
1183            assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1184        }
1185    }
1186
1187    #[test]
1188    fn test_nested_dictionary() {
1189        use arrow::array::{DictionaryArray, Int32Array, StructArray};
1190        use arrow::datatypes::{Field, Int32Type};
1191
1192        // Create a dictionary with struct values
1193        let id_array = Int32Array::from(vec![1, 2, 3]);
1194        let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
1195        let struct_array = StructArray::from(vec![
1196            (
1197                Arc::new(Field::new("id", DataType::Int32, false)),
1198                Arc::new(id_array) as ArrayRef,
1199            ),
1200            (
1201                Arc::new(Field::new("name", DataType::Utf8, false)),
1202                Arc::new(name_array) as ArrayRef,
1203            ),
1204        ]);
1205
1206        let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1207        let dict_array =
1208            DictionaryArray::<Int32Type>::try_new(keys, Arc::new(struct_array)).unwrap();
1209
1210        let options = CastOptions::default();
1211        let mut row_builder =
1212            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1213                .unwrap();
1214        let mut array_builder = VariantArrayBuilder::new(5);
1215
1216        // Test sequential access
1217        for i in 0..5 {
1218            row_builder.append_row(&mut array_builder, i).unwrap();
1219        }
1220
1221        let variant_array = array_builder.build();
1222        assert_eq!(variant_array.len(), 5);
1223
1224        // Verify the nested struct values
1225        let first_variant = variant_array.value(0);
1226        assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1227        assert_eq!(
1228            first_variant.get_object_field("name"),
1229            Some(Variant::from("Alice"))
1230        );
1231
1232        let second_variant = variant_array.value(1);
1233        assert_eq!(
1234            second_variant.get_object_field("id"),
1235            Some(Variant::from(2))
1236        );
1237        assert_eq!(
1238            second_variant.get_object_field("name"),
1239            Some(Variant::from("Bob"))
1240        );
1241
1242        // Test that repeated keys give same values
1243        let third_variant = variant_array.value(2);
1244        assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(1)));
1245        assert_eq!(
1246            third_variant.get_object_field("name"),
1247            Some(Variant::from("Alice"))
1248        );
1249    }
1250
1251    #[test]
1252    fn test_list_row_builder() {
1253        use arrow::array::ListArray;
1254
1255        // Create a list array: [[1, 2], [3, 4, 5], null, []]
1256        let data = vec![
1257            Some(vec![Some(1), Some(2)]),
1258            Some(vec![Some(3), Some(4), Some(5)]),
1259            None,
1260            Some(vec![]),
1261        ];
1262        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1263
1264        let variant_array = execute_row_builder_test(&list_array);
1265
1266        // Row 0: [1, 2]
1267        let row0 = variant_array.value(0);
1268        let list0 = row0.as_list().unwrap();
1269        assert_eq!(list0.len(), 2);
1270        assert_eq!(list0.get(0), Some(Variant::from(1)));
1271        assert_eq!(list0.get(1), Some(Variant::from(2)));
1272
1273        // Row 1: [3, 4, 5]
1274        let row1 = variant_array.value(1);
1275        let list1 = row1.as_list().unwrap();
1276        assert_eq!(list1.len(), 3);
1277        assert_eq!(list1.get(0), Some(Variant::from(3)));
1278        assert_eq!(list1.get(1), Some(Variant::from(4)));
1279        assert_eq!(list1.get(2), Some(Variant::from(5)));
1280
1281        // Row 2: null
1282        assert!(variant_array.is_null(2));
1283
1284        // Row 3: []
1285        let row3 = variant_array.value(3);
1286        let list3 = row3.as_list().unwrap();
1287        assert_eq!(list3.len(), 0);
1288    }
1289
1290    #[test]
1291    fn test_sliced_list_row_builder() {
1292        use arrow::array::ListArray;
1293
1294        // Create a list array: [[1, 2], [3, 4, 5], [6]]
1295        let data = vec![
1296            Some(vec![Some(1), Some(2)]),
1297            Some(vec![Some(3), Some(4), Some(5)]),
1298            Some(vec![Some(6)]),
1299        ];
1300        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1301
1302        // Slice to get just the middle element: [[3, 4, 5]]
1303        let sliced_array = list_array.slice(1, 1);
1304
1305        let options = CastOptions::default();
1306        let mut row_builder =
1307            make_arrow_to_variant_row_builder(sliced_array.data_type(), &sliced_array, &options)
1308                .unwrap();
1309        let mut variant_array_builder = VariantArrayBuilder::new(sliced_array.len());
1310
1311        // Test the single row
1312        row_builder
1313            .append_row(&mut variant_array_builder, 0)
1314            .unwrap();
1315        let variant_array = variant_array_builder.build();
1316
1317        // Verify result
1318        assert_eq!(variant_array.len(), 1);
1319
1320        // Row 0: [3, 4, 5]
1321        let row0 = variant_array.value(0);
1322        let list0 = row0.as_list().unwrap();
1323        assert_eq!(list0.len(), 3);
1324        assert_eq!(list0.get(0), Some(Variant::from(3)));
1325        assert_eq!(list0.get(1), Some(Variant::from(4)));
1326        assert_eq!(list0.get(2), Some(Variant::from(5)));
1327    }
1328
1329    #[test]
1330    fn test_nested_list_row_builder() {
1331        use arrow::array::ListArray;
1332        use arrow::datatypes::Field;
1333
1334        // Build the nested structure manually
1335        let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
1336        let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_field), true));
1337
1338        let values_data = vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])];
1339        let values_list = ListArray::from_iter_primitive::<Int32Type, _, _>(values_data);
1340
1341        let outer_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 2].into());
1342        let outer_list = ListArray::new(
1343            inner_list_field,
1344            outer_offsets,
1345            Arc::new(values_list),
1346            Some(arrow::buffer::NullBuffer::from(vec![true, false])),
1347        );
1348
1349        let options = CastOptions::default();
1350        let mut row_builder =
1351            make_arrow_to_variant_row_builder(outer_list.data_type(), &outer_list, &options)
1352                .unwrap();
1353        let mut variant_array_builder = VariantArrayBuilder::new(outer_list.len());
1354
1355        for i in 0..outer_list.len() {
1356            row_builder
1357                .append_row(&mut variant_array_builder, i)
1358                .unwrap();
1359        }
1360
1361        let variant_array = variant_array_builder.build();
1362
1363        // Verify results
1364        assert_eq!(variant_array.len(), 2);
1365
1366        // Row 0: [[1, 2], [3]]
1367        let row0 = variant_array.value(0);
1368        let outer_list0 = row0.as_list().unwrap();
1369        assert_eq!(outer_list0.len(), 2);
1370
1371        let inner_list0_0 = outer_list0.get(0).unwrap();
1372        let inner_list0_0 = inner_list0_0.as_list().unwrap();
1373        assert_eq!(inner_list0_0.len(), 2);
1374        assert_eq!(inner_list0_0.get(0), Some(Variant::from(1)));
1375        assert_eq!(inner_list0_0.get(1), Some(Variant::from(2)));
1376
1377        let inner_list0_1 = outer_list0.get(1).unwrap();
1378        let inner_list0_1 = inner_list0_1.as_list().unwrap();
1379        assert_eq!(inner_list0_1.len(), 1);
1380        assert_eq!(inner_list0_1.get(0), Some(Variant::from(3)));
1381
1382        // Row 1: null
1383        assert!(variant_array.is_null(1));
1384    }
1385
1386    #[test]
1387    fn test_map_row_builder() {
1388        use arrow::array::{Int32Array, MapArray, StringArray, StructArray};
1389        use arrow::buffer::{NullBuffer, OffsetBuffer};
1390        use arrow::datatypes::{DataType, Field, Fields};
1391        use std::sync::Arc;
1392
1393        // Create the entries struct array (key-value pairs)
1394        let keys = StringArray::from(vec!["key1", "key2", "key3"]);
1395        let values = Int32Array::from(vec![1, 2, 3]);
1396        let entries_fields = Fields::from(vec![
1397            Field::new("key", DataType::Utf8, false),
1398            Field::new("value", DataType::Int32, true),
1399        ]);
1400        let entries = StructArray::new(
1401            entries_fields.clone(),
1402            vec![Arc::new(keys), Arc::new(values)],
1403            None, // No nulls in the entries themselves
1404        );
1405
1406        // Create offsets for 4 maps: [0..1], [1..1], [1..1], [1..3]
1407        // Map 0: {"key1": 1}    (1 entry)
1408        // Map 1: {}             (0 entries - empty)
1409        // Map 2: null           (0 entries but NULL via null buffer)
1410        // Map 3: {"key2": 2, "key3": 3}  (2 entries)
1411        let offsets = OffsetBuffer::new(vec![0, 1, 1, 1, 3].into());
1412
1413        // Create null buffer - map at index 2 is NULL
1414        let null_buffer = Some(NullBuffer::from(vec![true, true, false, true]));
1415
1416        // Create the map field
1417        let map_field = Arc::new(Field::new(
1418            "entries",
1419            DataType::Struct(entries_fields),
1420            false, // Keys are non-nullable
1421        ));
1422
1423        // Create MapArray using try_new
1424        let map_array = MapArray::try_new(
1425            map_field,
1426            offsets,
1427            entries,
1428            null_buffer,
1429            false, // not ordered
1430        )
1431        .unwrap();
1432
1433        let variant_array = execute_row_builder_test(&map_array);
1434
1435        // Map 0: {"key1": 1}
1436        let map0 = variant_array.value(0);
1437        let obj0 = map0.as_object().unwrap();
1438        assert_eq!(obj0.len(), 1);
1439        assert_eq!(obj0.get("key1"), Some(Variant::from(1)));
1440
1441        // Map 1: {} (empty object, not null)
1442        let map1 = variant_array.value(1);
1443        let obj1 = map1.as_object().unwrap();
1444        assert_eq!(obj1.len(), 0); // Empty object
1445
1446        // Map 2: null (actual NULL)
1447        assert!(variant_array.is_null(2));
1448
1449        // Map 3: {"key2": 2, "key3": 3}
1450        let map3 = variant_array.value(3);
1451        let obj3 = map3.as_object().unwrap();
1452        assert_eq!(obj3.len(), 2);
1453        assert_eq!(obj3.get("key2"), Some(Variant::from(2)));
1454        assert_eq!(obj3.get("key3"), Some(Variant::from(3)));
1455    }
1456
1457    #[test]
1458    fn test_union_sparse_row_builder() {
1459        use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1460        use arrow::buffer::ScalarBuffer;
1461        use arrow::datatypes::{DataType, Field, UnionFields};
1462        use std::sync::Arc;
1463
1464        // Create a sparse union array with mixed types (int, float, string)
1465        let int_array = Int32Array::from(vec![Some(1), None, None, None, Some(34), None]);
1466        let float_array = Float64Array::from(vec![None, Some(3.2), None, Some(32.5), None, None]);
1467        let string_array = StringArray::from(vec![None, None, Some("hello"), None, None, None]);
1468        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1469
1470        let union_fields = UnionFields::new(
1471            vec![0, 1, 2],
1472            vec![
1473                Field::new("int_field", DataType::Int32, false),
1474                Field::new("float_field", DataType::Float64, false),
1475                Field::new("string_field", DataType::Utf8, false),
1476            ],
1477        );
1478
1479        let children: Vec<Arc<dyn Array>> = vec![
1480            Arc::new(int_array),
1481            Arc::new(float_array),
1482            Arc::new(string_array),
1483        ];
1484
1485        let union_array = UnionArray::try_new(
1486            union_fields,
1487            type_ids,
1488            None, // Sparse union
1489            children,
1490        )
1491        .unwrap();
1492
1493        let variant_array = execute_row_builder_test(&union_array);
1494        assert_eq!(variant_array.value(0), Variant::Int32(1));
1495        assert_eq!(variant_array.value(1), Variant::Double(3.2));
1496        assert_eq!(variant_array.value(2), Variant::from("hello"));
1497        assert_eq!(variant_array.value(3), Variant::Double(32.5));
1498        assert_eq!(variant_array.value(4), Variant::Int32(34));
1499        assert!(variant_array.is_null(5));
1500    }
1501
1502    #[test]
1503    fn test_union_dense_row_builder() {
1504        use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1505        use arrow::buffer::ScalarBuffer;
1506        use arrow::datatypes::{DataType, Field, UnionFields};
1507        use std::sync::Arc;
1508
1509        // Create a dense union array with mixed types (int, float, string)
1510        let int_array = Int32Array::from(vec![Some(1), Some(34), None]);
1511        let float_array = Float64Array::from(vec![3.2, 32.5]);
1512        let string_array = StringArray::from(vec!["hello"]);
1513        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1514        let offsets = [0, 0, 0, 1, 1, 2]
1515            .into_iter()
1516            .collect::<ScalarBuffer<i32>>();
1517
1518        let union_fields = UnionFields::new(
1519            vec![0, 1, 2],
1520            vec![
1521                Field::new("int_field", DataType::Int32, false),
1522                Field::new("float_field", DataType::Float64, false),
1523                Field::new("string_field", DataType::Utf8, false),
1524            ],
1525        );
1526
1527        let children: Vec<Arc<dyn Array>> = vec![
1528            Arc::new(int_array),
1529            Arc::new(float_array),
1530            Arc::new(string_array),
1531        ];
1532
1533        let union_array = UnionArray::try_new(
1534            union_fields,
1535            type_ids,
1536            Some(offsets), // Dense union
1537            children,
1538        )
1539        .unwrap();
1540
1541        // Test the row builder
1542        let options = CastOptions::default();
1543        let mut row_builder =
1544            make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1545                .unwrap();
1546
1547        let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1548        for i in 0..union_array.len() {
1549            row_builder.append_row(&mut variant_builder, i).unwrap();
1550        }
1551        let variant_array = variant_builder.build();
1552
1553        assert_eq!(variant_array.len(), 6);
1554        assert_eq!(variant_array.value(0), Variant::Int32(1));
1555        assert_eq!(variant_array.value(1), Variant::Double(3.2));
1556        assert_eq!(variant_array.value(2), Variant::from("hello"));
1557        assert_eq!(variant_array.value(3), Variant::Double(32.5));
1558        assert_eq!(variant_array.value(4), Variant::Int32(34));
1559        assert!(variant_array.is_null(5));
1560    }
1561
1562    #[test]
1563    fn test_union_sparse_type_ids_row_builder() {
1564        use arrow::array::{Int32Array, StringArray, UnionArray};
1565        use arrow::buffer::ScalarBuffer;
1566        use arrow::datatypes::{DataType, Field, UnionFields};
1567        use std::sync::Arc;
1568
1569        // Create a sparse union with non-contiguous type IDs (1, 3)
1570        let int_array = Int32Array::from(vec![Some(42), None]);
1571        let string_array = StringArray::from(vec![None, Some("test")]);
1572        let type_ids = [1, 3].into_iter().collect::<ScalarBuffer<i8>>();
1573
1574        let union_fields = UnionFields::new(
1575            vec![1, 3], // Non-contiguous type IDs
1576            vec![
1577                Field::new("int_field", DataType::Int32, false),
1578                Field::new("string_field", DataType::Utf8, false),
1579            ],
1580        );
1581
1582        let children: Vec<Arc<dyn Array>> = vec![Arc::new(int_array), Arc::new(string_array)];
1583
1584        let union_array = UnionArray::try_new(
1585            union_fields,
1586            type_ids,
1587            None, // Sparse union
1588            children,
1589        )
1590        .unwrap();
1591
1592        // Test the row builder
1593        let options = CastOptions::default();
1594        let mut row_builder =
1595            make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1596                .unwrap();
1597
1598        let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1599        for i in 0..union_array.len() {
1600            row_builder.append_row(&mut variant_builder, i).unwrap();
1601        }
1602        let variant_array = variant_builder.build();
1603
1604        // Verify results
1605        assert_eq!(variant_array.len(), 2);
1606
1607        // Row 0: int 42 (type_id = 1)
1608        assert_eq!(variant_array.value(0), Variant::Int32(42));
1609
1610        // Row 1: string "test" (type_id = 3)
1611        assert_eq!(variant_array.value(1), Variant::from("test"));
1612    }
1613
1614    #[test]
1615    fn test_decimal32_row_builder() {
1616        use arrow::array::Decimal32Array;
1617        use parquet_variant::VariantDecimal4;
1618
1619        // Test Decimal32Array with scale 2 (e.g., for currency: 12.34)
1620        let decimal_array = Decimal32Array::from(vec![Some(1234), None, Some(-5678)])
1621            .with_precision_and_scale(9, 2)
1622            .unwrap();
1623
1624        test_row_builder_basic(
1625            &decimal_array,
1626            vec![
1627                Some(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())),
1628                None,
1629                Some(Variant::from(VariantDecimal4::try_new(-5678, 2).unwrap())),
1630            ],
1631        );
1632    }
1633
1634    #[test]
1635    fn test_decimal128_row_builder() {
1636        use arrow::array::Decimal128Array;
1637        use parquet_variant::VariantDecimal16;
1638
1639        // Test Decimal128Array with negative scale (multiply by 10^|scale|)
1640        let decimal_array = Decimal128Array::from(vec![Some(123), None, Some(456)])
1641            .with_precision_and_scale(10, -2)
1642            .unwrap();
1643
1644        test_row_builder_basic(
1645            &decimal_array,
1646            vec![
1647                Some(Variant::from(VariantDecimal16::try_new(12300, 0).unwrap())),
1648                None,
1649                Some(Variant::from(VariantDecimal16::try_new(45600, 0).unwrap())),
1650            ],
1651        );
1652    }
1653
1654    #[test]
1655    fn test_decimal256_overflow_row_builder() {
1656        use arrow::array::Decimal256Array;
1657        use arrow::datatypes::i256;
1658
1659        // Test Decimal256Array with a value that overflows i128
1660        let large_value = i256::from_i128(i128::MAX) + i256::from(1); // Overflows i128
1661        let decimal_array = Decimal256Array::from(vec![Some(large_value), Some(i256::from(123))])
1662            .with_precision_and_scale(76, 3)
1663            .unwrap();
1664
1665        test_row_builder_basic_with_options(
1666            &decimal_array,
1667            vec![
1668                Some(Variant::Null), // Overflow value becomes Variant::Null
1669                Some(Variant::from(VariantDecimal16::try_new(123, 3).unwrap())),
1670            ],
1671            CastOptions { strict: false },
1672        );
1673    }
1674
1675    #[test]
1676    fn test_binary_row_builder() {
1677        use arrow::array::BinaryArray;
1678
1679        let binary_data = vec![
1680            Some(b"hello".as_slice()),
1681            None,
1682            Some(b"\x00\x01\x02\xFF".as_slice()),
1683            Some(b"".as_slice()), // Empty binary
1684        ];
1685        let binary_array = BinaryArray::from(binary_data);
1686
1687        test_row_builder_basic(
1688            &binary_array,
1689            vec![
1690                Some(Variant::from(b"hello".as_slice())),
1691                None,
1692                Some(Variant::from([0x00, 0x01, 0x02, 0xFF].as_slice())),
1693                Some(Variant::from([].as_slice())),
1694            ],
1695        );
1696    }
1697
1698    #[test]
1699    fn test_binary_view_row_builder() {
1700        use arrow::array::BinaryViewArray;
1701
1702        let binary_data = vec![
1703            Some(b"short".as_slice()),
1704            None,
1705            Some(b"this is a longer binary view that exceeds inline storage".as_slice()),
1706        ];
1707        let binary_view_array = BinaryViewArray::from(binary_data);
1708
1709        test_row_builder_basic(
1710            &binary_view_array,
1711            vec![
1712                Some(Variant::from(b"short".as_slice())),
1713                None,
1714                Some(Variant::from(
1715                    b"this is a longer binary view that exceeds inline storage".as_slice(),
1716                )),
1717            ],
1718        );
1719    }
1720
1721    #[test]
1722    fn test_fixed_size_binary_row_builder() {
1723        use arrow::array::FixedSizeBinaryArray;
1724
1725        let binary_data = vec![
1726            Some([0x01, 0x02, 0x03, 0x04]),
1727            None,
1728            Some([0xFF, 0xFE, 0xFD, 0xFC]),
1729        ];
1730        let fixed_binary_array =
1731            FixedSizeBinaryArray::try_from_sparse_iter_with_size(binary_data.into_iter(), 4)
1732                .unwrap();
1733
1734        test_row_builder_basic(
1735            &fixed_binary_array,
1736            vec![
1737                Some(Variant::from([0x01, 0x02, 0x03, 0x04].as_slice())),
1738                None,
1739                Some(Variant::from([0xFF, 0xFE, 0xFD, 0xFC].as_slice())),
1740            ],
1741        );
1742    }
1743
1744    #[test]
1745    fn test_utf8_view_row_builder() {
1746        use arrow::array::StringViewArray;
1747
1748        let string_data = vec![
1749            Some("short"),
1750            None,
1751            Some("this is a much longer string that will be stored out-of-line in the buffer"),
1752        ];
1753        let string_view_array = StringViewArray::from(string_data);
1754
1755        test_row_builder_basic(
1756            &string_view_array,
1757            vec![
1758                Some(Variant::from("short")),
1759                None,
1760                Some(Variant::from(
1761                    "this is a much longer string that will be stored out-of-line in the buffer",
1762                )),
1763            ],
1764        );
1765    }
1766
1767    #[test]
1768    fn test_timestamp_second_row_builder() {
1769        use arrow::array::TimestampSecondArray;
1770
1771        let timestamp_data = vec![
1772            Some(1609459200), // 2021-01-01 00:00:00 UTC
1773            None,
1774            Some(1640995200), // 2022-01-01 00:00:00 UTC
1775        ];
1776        let timestamp_array = TimestampSecondArray::from(timestamp_data);
1777
1778        let expected_naive1 = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1779        let expected_naive2 = DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc();
1780
1781        test_row_builder_basic(
1782            &timestamp_array,
1783            vec![
1784                Some(Variant::from(expected_naive1)),
1785                None,
1786                Some(Variant::from(expected_naive2)),
1787            ],
1788        );
1789    }
1790
1791    #[test]
1792    fn test_timestamp_with_timezone_row_builder() {
1793        use arrow::array::TimestampMicrosecondArray;
1794        use chrono::DateTime;
1795
1796        let timestamp_data = vec![
1797            Some(1609459200000000), // 2021-01-01 00:00:00 UTC (in microseconds)
1798            None,
1799            Some(1640995200000000), // 2022-01-01 00:00:00 UTC (in microseconds)
1800        ];
1801        let timezone = "UTC".to_string();
1802        let timestamp_array =
1803            TimestampMicrosecondArray::from(timestamp_data).with_timezone(timezone);
1804
1805        let expected_utc1 = DateTime::from_timestamp(1609459200, 0).unwrap();
1806        let expected_utc2 = DateTime::from_timestamp(1640995200, 0).unwrap();
1807
1808        test_row_builder_basic(
1809            &timestamp_array,
1810            vec![
1811                Some(Variant::from(expected_utc1)),
1812                None,
1813                Some(Variant::from(expected_utc2)),
1814            ],
1815        );
1816    }
1817
1818    #[test]
1819    fn test_timestamp_nanosecond_precision_row_builder() {
1820        use arrow::array::TimestampNanosecondArray;
1821
1822        let timestamp_data = vec![
1823            Some(1609459200123456789), // 2021-01-01 00:00:00.123456789 UTC
1824            None,
1825            Some(1609459200000000000), // 2021-01-01 00:00:00.000000000 UTC (no fractional seconds)
1826        ];
1827        let timestamp_array = TimestampNanosecondArray::from(timestamp_data);
1828
1829        let expected_with_nanos = DateTime::from_timestamp(1609459200, 123456789)
1830            .unwrap()
1831            .naive_utc();
1832        let expected_no_nanos = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1833
1834        test_row_builder_basic(
1835            &timestamp_array,
1836            vec![
1837                Some(Variant::from(expected_with_nanos)),
1838                None,
1839                Some(Variant::from(expected_no_nanos)),
1840            ],
1841        );
1842    }
1843
1844    #[test]
1845    fn test_timestamp_millisecond_row_builder() {
1846        use arrow::array::TimestampMillisecondArray;
1847
1848        let timestamp_data = vec![
1849            Some(1609459200123), // 2021-01-01 00:00:00.123 UTC
1850            None,
1851            Some(1609459200000), // 2021-01-01 00:00:00.000 UTC
1852        ];
1853        let timestamp_array = TimestampMillisecondArray::from(timestamp_data);
1854
1855        let expected_with_millis = DateTime::from_timestamp(1609459200, 123000000)
1856            .unwrap()
1857            .naive_utc();
1858        let expected_no_millis = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1859
1860        test_row_builder_basic(
1861            &timestamp_array,
1862            vec![
1863                Some(Variant::from(expected_with_millis)),
1864                None,
1865                Some(Variant::from(expected_no_millis)),
1866            ],
1867        );
1868    }
1869
1870    #[test]
1871    fn test_date32_row_builder() {
1872        use arrow::array::Date32Array;
1873        use chrono::NaiveDate;
1874
1875        let date_data = vec![
1876            Some(0), // 1970-01-01
1877            None,
1878            Some(19723),   // 2024-01-01 (days since epoch)
1879            Some(-719162), // 0001-01-01 (near minimum)
1880        ];
1881        let date_array = Date32Array::from(date_data);
1882
1883        let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1884        let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1885        let expected_min = NaiveDate::from_ymd_opt(1, 1, 1).unwrap();
1886
1887        test_row_builder_basic(
1888            &date_array,
1889            vec![
1890                Some(Variant::from(expected_epoch)),
1891                None,
1892                Some(Variant::from(expected_2024)),
1893                Some(Variant::from(expected_min)),
1894            ],
1895        );
1896    }
1897
1898    #[test]
1899    fn test_date64_row_builder() {
1900        use arrow::array::Date64Array;
1901        use chrono::NaiveDate;
1902
1903        // Test Date64Array with various dates (milliseconds since epoch)
1904        let date_data = vec![
1905            Some(0), // 1970-01-01
1906            None,
1907            Some(1704067200000), // 2024-01-01 (milliseconds since epoch)
1908            Some(86400000),      // 1970-01-02
1909        ];
1910        let date_array = Date64Array::from(date_data);
1911
1912        let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1913        let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1914        let expected_next_day = NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();
1915
1916        test_row_builder_basic(
1917            &date_array,
1918            vec![
1919                Some(Variant::from(expected_epoch)),
1920                None,
1921                Some(Variant::from(expected_2024)),
1922                Some(Variant::from(expected_next_day)),
1923            ],
1924        );
1925    }
1926
1927    #[test]
1928    fn test_time32_second_row_builder() {
1929        use arrow::array::Time32SecondArray;
1930        use chrono::NaiveTime;
1931
1932        // Test Time32SecondArray with various times (seconds since midnight)
1933        let time_data = vec![
1934            Some(0), // 00:00:00
1935            None,
1936            Some(3661),  // 01:01:01
1937            Some(86399), // 23:59:59
1938        ];
1939        let time_array = Time32SecondArray::from(time_data);
1940
1941        let expected_midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
1942        let expected_time = NaiveTime::from_hms_opt(1, 1, 1).unwrap();
1943        let expected_last = NaiveTime::from_hms_opt(23, 59, 59).unwrap();
1944
1945        test_row_builder_basic(
1946            &time_array,
1947            vec![
1948                Some(Variant::from(expected_midnight)),
1949                None,
1950                Some(Variant::from(expected_time)),
1951                Some(Variant::from(expected_last)),
1952            ],
1953        );
1954    }
1955
1956    #[test]
1957    fn test_time32_millisecond_row_builder() {
1958        use arrow::array::Time32MillisecondArray;
1959        use chrono::NaiveTime;
1960
1961        // Test Time32MillisecondArray with various times (milliseconds since midnight)
1962        let time_data = vec![
1963            Some(0), // 00:00:00.000
1964            None,
1965            Some(3661123),  // 01:01:01.123
1966            Some(86399999), // 23:59:59.999
1967        ];
1968        let time_array = Time32MillisecondArray::from(time_data);
1969
1970        let expected_midnight = NaiveTime::from_hms_milli_opt(0, 0, 0, 0).unwrap();
1971        let expected_time = NaiveTime::from_hms_milli_opt(1, 1, 1, 123).unwrap();
1972        let expected_last = NaiveTime::from_hms_milli_opt(23, 59, 59, 999).unwrap();
1973
1974        test_row_builder_basic(
1975            &time_array,
1976            vec![
1977                Some(Variant::from(expected_midnight)),
1978                None,
1979                Some(Variant::from(expected_time)),
1980                Some(Variant::from(expected_last)),
1981            ],
1982        );
1983    }
1984
1985    #[test]
1986    fn test_time64_microsecond_row_builder() {
1987        use arrow::array::Time64MicrosecondArray;
1988        use chrono::NaiveTime;
1989
1990        // Test Time64MicrosecondArray with various times (microseconds since midnight)
1991        let time_data = vec![
1992            Some(0), // 00:00:00.000000
1993            None,
1994            Some(3661123456),  // 01:01:01.123456
1995            Some(86399999999), // 23:59:59.999999
1996        ];
1997        let time_array = Time64MicrosecondArray::from(time_data);
1998
1999        let expected_midnight = NaiveTime::from_hms_micro_opt(0, 0, 0, 0).unwrap();
2000        let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2001        let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2002
2003        test_row_builder_basic(
2004            &time_array,
2005            vec![
2006                Some(Variant::from(expected_midnight)),
2007                None,
2008                Some(Variant::from(expected_time)),
2009                Some(Variant::from(expected_last)),
2010            ],
2011        );
2012    }
2013
2014    #[test]
2015    fn test_time64_nanosecond_row_builder() {
2016        use arrow::array::Time64NanosecondArray;
2017        use chrono::NaiveTime;
2018
2019        // Test Time64NanosecondArray with various times (nanoseconds since midnight)
2020        let time_data = vec![
2021            Some(0), // 00:00:00.000000000
2022            None,
2023            Some(3661123456789),  // 01:01:01.123456789
2024            Some(86399999999999), // 23:59:59.999999999
2025        ];
2026        let time_array = Time64NanosecondArray::from(time_data);
2027
2028        let expected_midnight = NaiveTime::from_hms_nano_opt(0, 0, 0, 0).unwrap();
2029        // Nanoseconds are truncated to microsecond precision in Variant
2030        let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2031        let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2032
2033        test_row_builder_basic(
2034            &time_array,
2035            vec![
2036                Some(Variant::from(expected_midnight)),
2037                None,
2038                Some(Variant::from(expected_time)),
2039                Some(Variant::from(expected_last)),
2040            ],
2041        );
2042    }
2043}