parquet_variant_compute/
arrow_to_variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::{
19    Array, ArrayRef, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray,
20    GenericListViewArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
21};
22use arrow::compute::{CastOptions, kernels::cast};
23use arrow::datatypes::{
24    self as datatypes, ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType,
25    DecimalType, RunEndIndexType,
26};
27use arrow::temporal_conversions::{as_date, as_datetime, as_time};
28use arrow_schema::{ArrowError, DataType, TimeUnit};
29use chrono::{DateTime, TimeZone, Utc};
30use parquet_variant::{
31    ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
32    VariantDecimal16, VariantDecimalType,
33};
34use std::collections::HashMap;
35use std::ops::Range;
36
37// ============================================================================
38// Row-oriented builders for efficient Arrow-to-Variant conversion
39// ============================================================================
40
41/// Row builder for converting Arrow arrays to VariantArray row by row
42pub(crate) enum ArrowToVariantRowBuilder<'a> {
43    Null(NullArrowToVariantBuilder),
44    Boolean(BooleanArrowToVariantBuilder<'a>),
45    PrimitiveInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::Int8Type>),
46    PrimitiveInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::Int16Type>),
47    PrimitiveInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::Int32Type>),
48    PrimitiveInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::Int64Type>),
49    PrimitiveUInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt8Type>),
50    PrimitiveUInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt16Type>),
51    PrimitiveUInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt32Type>),
52    PrimitiveUInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt64Type>),
53    PrimitiveFloat16(PrimitiveArrowToVariantBuilder<'a, datatypes::Float16Type>),
54    PrimitiveFloat32(PrimitiveArrowToVariantBuilder<'a, datatypes::Float32Type>),
55    PrimitiveFloat64(PrimitiveArrowToVariantBuilder<'a, datatypes::Float64Type>),
56    Decimal32(DecimalArrowToVariantBuilder<'a, datatypes::Decimal32Type, VariantDecimal4>),
57    Decimal64(DecimalArrowToVariantBuilder<'a, datatypes::Decimal64Type, VariantDecimal8>),
58    Decimal128(DecimalArrowToVariantBuilder<'a, datatypes::Decimal128Type, VariantDecimal16>),
59    Decimal256(Decimal256ArrowToVariantBuilder<'a>),
60    TimestampSecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampSecondType>),
61    TimestampMillisecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMillisecondType>),
62    TimestampMicrosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMicrosecondType>),
63    TimestampNanosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampNanosecondType>),
64    Date32(DateArrowToVariantBuilder<'a, datatypes::Date32Type>),
65    Date64(DateArrowToVariantBuilder<'a, datatypes::Date64Type>),
66    Time32Second(TimeArrowToVariantBuilder<'a, datatypes::Time32SecondType>),
67    Time32Millisecond(TimeArrowToVariantBuilder<'a, datatypes::Time32MillisecondType>),
68    Time64Microsecond(TimeArrowToVariantBuilder<'a, datatypes::Time64MicrosecondType>),
69    Time64Nanosecond(TimeArrowToVariantBuilder<'a, datatypes::Time64NanosecondType>),
70    Binary(BinaryArrowToVariantBuilder<'a, i32>),
71    LargeBinary(BinaryArrowToVariantBuilder<'a, i64>),
72    BinaryView(BinaryViewArrowToVariantBuilder<'a>),
73    FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder<'a>),
74    Utf8(StringArrowToVariantBuilder<'a, i32>),
75    LargeUtf8(StringArrowToVariantBuilder<'a, i64>),
76    Utf8View(StringViewArrowToVariantBuilder<'a>),
77    List(ListArrowToVariantBuilder<'a, GenericListArray<i32>>),
78    LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
79    ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
80    LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
81    FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
82    Struct(StructArrowToVariantBuilder<'a>),
83    Map(MapArrowToVariantBuilder<'a>),
84    Union(UnionArrowToVariantBuilder<'a>),
85    Dictionary(DictionaryArrowToVariantBuilder<'a>),
86    RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int16Type>),
87    RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int32Type>),
88    RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int64Type>),
89}
90
91impl<'a> ArrowToVariantRowBuilder<'a> {
92    /// Appends a single row at the given index to the supplied builder.
93    pub fn append_row(
94        &mut self,
95        builder: &mut impl VariantBuilderExt,
96        index: usize,
97    ) -> Result<(), ArrowError> {
98        use ArrowToVariantRowBuilder::*;
99        match self {
100            Null(b) => b.append_row(builder, index),
101            Boolean(b) => b.append_row(builder, index),
102            PrimitiveInt8(b) => b.append_row(builder, index),
103            PrimitiveInt16(b) => b.append_row(builder, index),
104            PrimitiveInt32(b) => b.append_row(builder, index),
105            PrimitiveInt64(b) => b.append_row(builder, index),
106            PrimitiveUInt8(b) => b.append_row(builder, index),
107            PrimitiveUInt16(b) => b.append_row(builder, index),
108            PrimitiveUInt32(b) => b.append_row(builder, index),
109            PrimitiveUInt64(b) => b.append_row(builder, index),
110            PrimitiveFloat16(b) => b.append_row(builder, index),
111            PrimitiveFloat32(b) => b.append_row(builder, index),
112            PrimitiveFloat64(b) => b.append_row(builder, index),
113            Decimal32(b) => b.append_row(builder, index),
114            Decimal64(b) => b.append_row(builder, index),
115            Decimal128(b) => b.append_row(builder, index),
116            Decimal256(b) => b.append_row(builder, index),
117            TimestampSecond(b) => b.append_row(builder, index),
118            TimestampMillisecond(b) => b.append_row(builder, index),
119            TimestampMicrosecond(b) => b.append_row(builder, index),
120            TimestampNanosecond(b) => b.append_row(builder, index),
121            Date32(b) => b.append_row(builder, index),
122            Date64(b) => b.append_row(builder, index),
123            Time32Second(b) => b.append_row(builder, index),
124            Time32Millisecond(b) => b.append_row(builder, index),
125            Time64Microsecond(b) => b.append_row(builder, index),
126            Time64Nanosecond(b) => b.append_row(builder, index),
127            Binary(b) => b.append_row(builder, index),
128            LargeBinary(b) => b.append_row(builder, index),
129            BinaryView(b) => b.append_row(builder, index),
130            FixedSizeBinary(b) => b.append_row(builder, index),
131            Utf8(b) => b.append_row(builder, index),
132            LargeUtf8(b) => b.append_row(builder, index),
133            Utf8View(b) => b.append_row(builder, index),
134            List(b) => b.append_row(builder, index),
135            LargeList(b) => b.append_row(builder, index),
136            ListView(b) => b.append_row(builder, index),
137            LargeListView(b) => b.append_row(builder, index),
138            FixedSizeList(b) => b.append_row(builder, index),
139            Struct(b) => b.append_row(builder, index),
140            Map(b) => b.append_row(builder, index),
141            Union(b) => b.append_row(builder, index),
142            Dictionary(b) => b.append_row(builder, index),
143            RunEndEncodedInt16(b) => b.append_row(builder, index),
144            RunEndEncodedInt32(b) => b.append_row(builder, index),
145            RunEndEncodedInt64(b) => b.append_row(builder, index),
146        }
147    }
148}
149
150/// Factory function to create the appropriate row builder for a given DataType
151pub(crate) fn make_arrow_to_variant_row_builder<'a>(
152    data_type: &'a DataType,
153    array: &'a dyn Array,
154    options: &'a CastOptions,
155) -> Result<ArrowToVariantRowBuilder<'a>, ArrowError> {
156    use ArrowToVariantRowBuilder::*;
157    let builder =
158        match data_type {
159            DataType::Null => Null(NullArrowToVariantBuilder),
160            DataType::Boolean => Boolean(BooleanArrowToVariantBuilder::new(array)),
161            DataType::Int8 => PrimitiveInt8(PrimitiveArrowToVariantBuilder::new(array)),
162            DataType::Int16 => PrimitiveInt16(PrimitiveArrowToVariantBuilder::new(array)),
163            DataType::Int32 => PrimitiveInt32(PrimitiveArrowToVariantBuilder::new(array)),
164            DataType::Int64 => PrimitiveInt64(PrimitiveArrowToVariantBuilder::new(array)),
165            DataType::UInt8 => PrimitiveUInt8(PrimitiveArrowToVariantBuilder::new(array)),
166            DataType::UInt16 => PrimitiveUInt16(PrimitiveArrowToVariantBuilder::new(array)),
167            DataType::UInt32 => PrimitiveUInt32(PrimitiveArrowToVariantBuilder::new(array)),
168            DataType::UInt64 => PrimitiveUInt64(PrimitiveArrowToVariantBuilder::new(array)),
169            DataType::Float16 => PrimitiveFloat16(PrimitiveArrowToVariantBuilder::new(array)),
170            DataType::Float32 => PrimitiveFloat32(PrimitiveArrowToVariantBuilder::new(array)),
171            DataType::Float64 => PrimitiveFloat64(PrimitiveArrowToVariantBuilder::new(array)),
172            DataType::Decimal32(_, scale) => {
173                Decimal32(DecimalArrowToVariantBuilder::new(array, options, *scale))
174            }
175            DataType::Decimal64(_, scale) => {
176                Decimal64(DecimalArrowToVariantBuilder::new(array, options, *scale))
177            }
178            DataType::Decimal128(_, scale) => {
179                Decimal128(DecimalArrowToVariantBuilder::new(array, options, *scale))
180            }
181            DataType::Decimal256(_, scale) => {
182                Decimal256(Decimal256ArrowToVariantBuilder::new(array, options, *scale))
183            }
184            DataType::Timestamp(time_unit, time_zone) => {
185                match time_unit {
186                    TimeUnit::Second => TimestampSecond(TimestampArrowToVariantBuilder::new(
187                        array,
188                        options,
189                        time_zone.is_some(),
190                    )),
191                    TimeUnit::Millisecond => TimestampMillisecond(
192                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
193                    ),
194                    TimeUnit::Microsecond => TimestampMicrosecond(
195                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
196                    ),
197                    TimeUnit::Nanosecond => TimestampNanosecond(
198                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
199                    ),
200                }
201            }
202            DataType::Date32 => Date32(DateArrowToVariantBuilder::new(array, options)),
203            DataType::Date64 => Date64(DateArrowToVariantBuilder::new(array, options)),
204            DataType::Time32(time_unit) => match time_unit {
205                TimeUnit::Second => Time32Second(TimeArrowToVariantBuilder::new(array, options)),
206                TimeUnit::Millisecond => {
207                    Time32Millisecond(TimeArrowToVariantBuilder::new(array, options))
208                }
209                _ => {
210                    return Err(ArrowError::CastError(format!(
211                        "Unsupported Time32 unit: {time_unit:?}"
212                    )));
213                }
214            },
215            DataType::Time64(time_unit) => match time_unit {
216                TimeUnit::Microsecond => {
217                    Time64Microsecond(TimeArrowToVariantBuilder::new(array, options))
218                }
219                TimeUnit::Nanosecond => {
220                    Time64Nanosecond(TimeArrowToVariantBuilder::new(array, options))
221                }
222                _ => {
223                    return Err(ArrowError::CastError(format!(
224                        "Unsupported Time64 unit: {time_unit:?}"
225                    )));
226                }
227            },
228            DataType::Duration(_) | DataType::Interval(_) => {
229                return Err(ArrowError::InvalidArgumentError(
230                    "Casting duration/interval types to Variant is not supported. \
231                    The Variant format does not define duration/interval types."
232                        .to_string(),
233                ));
234            }
235            DataType::Binary => Binary(BinaryArrowToVariantBuilder::new(array)),
236            DataType::LargeBinary => LargeBinary(BinaryArrowToVariantBuilder::new(array)),
237            DataType::BinaryView => BinaryView(BinaryViewArrowToVariantBuilder::new(array)),
238            DataType::FixedSizeBinary(_) => {
239                FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder::new(array))
240            }
241            DataType::Utf8 => Utf8(StringArrowToVariantBuilder::new(array)),
242            DataType::LargeUtf8 => LargeUtf8(StringArrowToVariantBuilder::new(array)),
243            DataType::Utf8View => Utf8View(StringViewArrowToVariantBuilder::new(array)),
244            DataType::List(_) => List(ListArrowToVariantBuilder::new(array.as_list(), options)?),
245            DataType::LargeList(_) => {
246                LargeList(ListArrowToVariantBuilder::new(array.as_list(), options)?)
247            }
248            DataType::ListView(_) => ListView(ListArrowToVariantBuilder::new(
249                array.as_list_view(),
250                options,
251            )?),
252            DataType::LargeListView(_) => LargeListView(ListArrowToVariantBuilder::new(
253                array.as_list_view(),
254                options,
255            )?),
256            DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
257                array.as_fixed_size_list(),
258                options,
259            )?),
260            DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
261                array.as_struct(),
262                options,
263            )?),
264            DataType::Map(_, _) => Map(MapArrowToVariantBuilder::new(array, options)?),
265            DataType::Union(_, _) => Union(UnionArrowToVariantBuilder::new(array, options)?),
266            DataType::Dictionary(_, _) => {
267                Dictionary(DictionaryArrowToVariantBuilder::new(array, options)?)
268            }
269            DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
270                DataType::Int16 => {
271                    RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
272                }
273                DataType::Int32 => {
274                    RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
275                }
276                DataType::Int64 => {
277                    RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
278                }
279                _ => {
280                    return Err(ArrowError::CastError(format!(
281                        "Unsupported run ends type: {}",
282                        run_ends.data_type()
283                    )));
284                }
285            },
286        };
287    Ok(builder)
288}
289
290/// Macro to define (possibly generic) row builders with consistent structure and behavior.
291///
292/// The macro optionally allows to define a transform for values read from the underlying
293/// array. Transforms of the form `|value| { ... }` are infallible (and should produce something
294/// that implements `Into<Variant>`), while transforms of the form `|value| -> Option<_> { ... }`
295/// are fallible (and should produce `Option<impl Into<Variant>>`); a failed tarnsform will either
296/// append null to the builder or return an error, depending on cast options.
297///
298/// Also supports optional extra fields that are passed to the constructor and which are available
299/// by reference in the value transform. Providing a fallible value transform requires also
300/// providing the extra field `options: &'a CastOptions`.
301// TODO: If/when the macro_metavar_expr feature stabilizes, the `ignore` meta-function would allow
302// us to "use" captured tokens without emitting them:
303//
304// ```
305// $(
306//     ${ignore($value)}
307//     $(
308//         ${ignore($option_ty)}
309//         options: &$lifetime CastOptions,
310//     )?
311// )?
312// ```
313//
314// That, in turn, would allow us to inject the `options` field whenever the user specifies a
315// fallible value transform, instead of requiring them to manually define it. This might not be
316// worth the trouble, tho, because it makes for some pretty bulky and unwieldy macro expansions.
317macro_rules! define_row_builder {
318    (
319        struct $name:ident<$lifetime:lifetime $(, $generic:ident $( : $bound:path )? )*>
320        $( where $where_path:path: $where_bound:path $(,)? )?
321        $({ $( $field:ident: $field_type:ty ),+ $(,)? })?,
322        |$array_param:ident| -> $array_type:ty { $init_expr:expr }
323        $(, |$value:ident| $(-> Option<$option_ty:ty>)? $value_transform:expr )?
324    ) => {
325        pub(crate) struct $name<$lifetime $(, $generic: $( $bound )? )*>
326        $( where $where_path: $where_bound )?
327        {
328            array: &$lifetime $array_type,
329            $( $( $field: $field_type, )+ )?
330            _phantom: std::marker::PhantomData<($( $generic, )*)>, // capture all type params
331        }
332
333        impl<$lifetime $(, $generic: $( $bound )? )*> $name<$lifetime $(, $generic)*>
334        $( where $where_path: $where_bound )?
335        {
336            pub(crate) fn new($array_param: &$lifetime dyn Array $( $(, $field: $field_type )+ )?) -> Self {
337                Self {
338                    array: $init_expr,
339                    $( $( $field, )+ )?
340                    _phantom: std::marker::PhantomData,
341                }
342            }
343
344            fn append_row(&self, builder: &mut impl VariantBuilderExt, index: usize) -> Result<(), ArrowError> {
345                if self.array.is_null(index) {
346                    builder.append_null();
347                } else {
348                    // Macro hygiene: Give any extra fields names the value transform can access.
349                    //
350                    // The value transform doesn't normally reference cast options, but the macro's
351                    // caller still has to declare the field because stable rust has no way to "use"
352                    // a captured token without emitting it. So, silence unused variable warnings,
353                    // assuming that's the `options` field. Unfortunately, that also silences
354                    // legitimate compiler warnings if an infallible value transform fails to use
355                    // its first extra field.
356                    $(
357                        #[allow(unused)]
358                        $( let $field = &self.$field; )+
359                    )?
360
361                    // Apply the value transform, if any (with name swapping for hygiene)
362                    let value = self.array.value(index);
363                    $(
364                        let $value = value;
365                        let value = $value_transform;
366                        $(
367                            // NOTE: The `?` macro expansion fails without the type annotation.
368                            let Some(value): Option<$option_ty> = value else {
369                                if !self.options.safe {
370                                    return Err(ArrowError::ComputeError(format!(
371                                        "Failed to convert value at index {index}: conversion failed",
372                                    )));
373                                } else {
374                                    // Overflow is encoded as Variant::Null,
375                                    // distinct from None indicating a missing value
376                                    builder.append_value(Variant::Null);
377                                    return Ok(());
378                                }
379                            };
380                        )?
381                    )?
382                    builder.append_value(value);
383                }
384                Ok(())
385            }
386        }
387    };
388}
389
390define_row_builder!(
391    struct BooleanArrowToVariantBuilder<'a>,
392    |array| -> arrow::array::BooleanArray { array.as_boolean() }
393);
394
395define_row_builder!(
396    struct PrimitiveArrowToVariantBuilder<'a, T: ArrowPrimitiveType>
397    where T::Native: Into<Variant<'a, 'a>>,
398    |array| -> PrimitiveArray<T> { array.as_primitive() }
399);
400
401define_row_builder!(
402    struct DecimalArrowToVariantBuilder<'a, A: DecimalType, V>
403    where
404        V: VariantDecimalType<Native = A::Native>,
405    {
406        options: &'a CastOptions<'a>,
407        scale: i8,
408    },
409    |array| -> PrimitiveArray<A> { array.as_primitive() },
410    |value| -> Option<_> { V::try_new_with_signed_scale(value, *scale).ok() }
411);
412
413// Decimal256 needs a two-stage conversion via i128
414define_row_builder!(
415    struct Decimal256ArrowToVariantBuilder<'a> {
416        options: &'a CastOptions<'a>,
417        scale: i8,
418    },
419    |array| -> arrow::array::Decimal256Array { array.as_primitive() },
420    |value| -> Option<_> {
421        let value = value.to_i128();
422        value.and_then(|v| VariantDecimal16::try_new_with_signed_scale(v, *scale).ok())
423    }
424);
425
426define_row_builder!(
427    struct TimestampArrowToVariantBuilder<'a, T: ArrowTimestampType> {
428        options: &'a CastOptions<'a>,
429        has_time_zone: bool,
430    },
431    |array| -> PrimitiveArray<T> { array.as_primitive() },
432    |value| -> Option<_> {
433        // Convert using Arrow's temporal conversion functions
434        as_datetime::<T>(value).map(|naive_datetime| {
435            if *has_time_zone {
436                // Has timezone -> DateTime<Utc> -> TimestampMicros/TimestampNanos
437                let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&naive_datetime);
438                Variant::from(utc_dt) // Uses From<DateTime<Utc>> for Variant
439            } else {
440                // No timezone -> NaiveDateTime -> TimestampNtzMicros/TimestampNtzNanos
441                Variant::from(naive_datetime) // Uses From<NaiveDateTime> for Variant
442            }
443        })
444    }
445);
446
447define_row_builder!(
448    struct DateArrowToVariantBuilder<'a, T: ArrowTemporalType>
449    where
450        i64: From<T::Native>,
451    {
452        options: &'a CastOptions<'a>,
453    },
454    |array| -> PrimitiveArray<T> { array.as_primitive() },
455    |value| -> Option<_> {
456        let date_value = i64::from(value);
457        as_date::<T>(date_value)
458    }
459);
460
461define_row_builder!(
462    struct TimeArrowToVariantBuilder<'a, T: ArrowTemporalType>
463    where
464        i64: From<T::Native>,
465    {
466        options: &'a CastOptions<'a>,
467    },
468    |array| -> PrimitiveArray<T> { array.as_primitive() },
469    |value| -> Option<_> {
470        let time_value = i64::from(value);
471        as_time::<T>(time_value)
472    }
473);
474
475define_row_builder!(
476    struct BinaryArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
477    |array| -> GenericBinaryArray<O> { array.as_binary() }
478);
479
480define_row_builder!(
481    struct BinaryViewArrowToVariantBuilder<'a>,
482    |array| -> arrow::array::BinaryViewArray { array.as_byte_view() }
483);
484
485define_row_builder!(
486    struct FixedSizeBinaryArrowToVariantBuilder<'a>,
487    |array| -> arrow::array::FixedSizeBinaryArray { array.as_fixed_size_binary() }
488);
489
490define_row_builder!(
491    struct StringArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
492    |array| -> GenericStringArray<O> { array.as_string() }
493);
494
495define_row_builder!(
496    struct StringViewArrowToVariantBuilder<'a>,
497    |array| -> arrow::array::StringViewArray { array.as_string_view() }
498);
499
500/// Null builder that always appends null
501pub(crate) struct NullArrowToVariantBuilder;
502
503impl NullArrowToVariantBuilder {
504    fn append_row(
505        &mut self,
506        builder: &mut impl VariantBuilderExt,
507        _index: usize,
508    ) -> Result<(), ArrowError> {
509        builder.append_null();
510        Ok(())
511    }
512}
513
514/// Generic list builder for ListLikeArray types including List, LargeList, ListView, LargeListView,
515/// and FixedSizeList
516pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
517    list_array: &'a L,
518    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
519}
520
521impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
522    pub(crate) fn new(array: &'a L, options: &'a CastOptions) -> Result<Self, ArrowError> {
523        let values = array.values();
524        let values_builder =
525            make_arrow_to_variant_row_builder(values.data_type(), values, options)?;
526
527        Ok(Self {
528            list_array: array,
529            values_builder: Box::new(values_builder),
530        })
531    }
532
533    fn append_row(
534        &mut self,
535        builder: &mut impl VariantBuilderExt,
536        index: usize,
537    ) -> Result<(), ArrowError> {
538        if self.list_array.is_null(index) {
539            builder.append_null();
540            return Ok(());
541        }
542
543        let range = self.list_array.element_range(index);
544
545        let mut list_builder = builder.try_new_list()?;
546        for value_index in range {
547            self.values_builder
548                .append_row(&mut list_builder, value_index)?;
549        }
550        list_builder.finish();
551        Ok(())
552    }
553}
554
555/// Trait for list-like arrays that can provide element ranges
556pub(crate) trait ListLikeArray: Array {
557    /// Get the values array
558    fn values(&self) -> &ArrayRef;
559
560    /// Get the start and end indices for a list element
561    fn element_range(&self, index: usize) -> Range<usize>;
562}
563
564impl<O: OffsetSizeTrait> ListLikeArray for GenericListArray<O> {
565    fn values(&self) -> &ArrayRef {
566        self.values()
567    }
568
569    fn element_range(&self, index: usize) -> Range<usize> {
570        let offsets = self.offsets();
571        let start = offsets[index].as_usize();
572        let end = offsets[index + 1].as_usize();
573        start..end
574    }
575}
576
577impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
578    fn values(&self) -> &ArrayRef {
579        self.values()
580    }
581
582    fn element_range(&self, index: usize) -> Range<usize> {
583        let offsets = self.value_offsets();
584        let sizes = self.value_sizes();
585        let offset = offsets[index].as_usize();
586        let size = sizes[index].as_usize();
587        offset..(offset + size)
588    }
589}
590
591impl ListLikeArray for FixedSizeListArray {
592    fn values(&self) -> &ArrayRef {
593        self.values()
594    }
595
596    fn element_range(&self, index: usize) -> Range<usize> {
597        let value_length = self.value_length().as_usize();
598        let offset = index * value_length;
599        offset..(offset + value_length)
600    }
601}
602
603/// Struct builder for StructArray
604pub(crate) struct StructArrowToVariantBuilder<'a> {
605    struct_array: &'a arrow::array::StructArray,
606    field_builders: Vec<(&'a str, ArrowToVariantRowBuilder<'a>)>,
607}
608
609impl<'a> StructArrowToVariantBuilder<'a> {
610    pub(crate) fn new(
611        struct_array: &'a arrow::array::StructArray,
612        options: &'a CastOptions,
613    ) -> Result<Self, ArrowError> {
614        let mut field_builders = Vec::new();
615
616        // Create a row builder for each field
617        for (field_name, field_array) in struct_array
618            .column_names()
619            .iter()
620            .zip(struct_array.columns().iter())
621        {
622            let field_builder = make_arrow_to_variant_row_builder(
623                field_array.data_type(),
624                field_array.as_ref(),
625                options,
626            )?;
627            field_builders.push((*field_name, field_builder));
628        }
629
630        Ok(Self {
631            struct_array,
632            field_builders,
633        })
634    }
635
636    fn append_row(
637        &mut self,
638        builder: &mut impl VariantBuilderExt,
639        index: usize,
640    ) -> Result<(), ArrowError> {
641        if self.struct_array.is_null(index) {
642            builder.append_null();
643        } else {
644            // Create object builder for this struct row
645            let mut obj_builder = builder.try_new_object()?;
646
647            // Process each field
648            for (field_name, row_builder) in &mut self.field_builders {
649                let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
650                row_builder.append_row(&mut field_builder, index)?;
651            }
652
653            obj_builder.finish();
654        }
655        Ok(())
656    }
657}
658
659/// Map builder for MapArray types
660pub(crate) struct MapArrowToVariantBuilder<'a> {
661    map_array: &'a arrow::array::MapArray,
662    key_strings: arrow::array::StringArray,
663    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
664}
665
666impl<'a> MapArrowToVariantBuilder<'a> {
667    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
668        let map_array = array.as_map();
669
670        // Pre-cast keys to strings once
671        let keys = cast(map_array.keys(), &DataType::Utf8)?;
672        let key_strings = keys.as_string::<i32>().clone();
673
674        // Create recursive builder for values
675        let values = map_array.values();
676        let values_builder =
677            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
678
679        Ok(Self {
680            map_array,
681            key_strings,
682            values_builder: Box::new(values_builder),
683        })
684    }
685
686    fn append_row(
687        &mut self,
688        builder: &mut impl VariantBuilderExt,
689        index: usize,
690    ) -> Result<(), ArrowError> {
691        // Check for NULL map first (via null bitmap)
692        if self.map_array.is_null(index) {
693            builder.append_null();
694            return Ok(());
695        }
696
697        let offsets = self.map_array.offsets();
698        let start = offsets[index].as_usize();
699        let end = offsets[index + 1].as_usize();
700
701        // Create object builder for this map
702        let mut object_builder = builder.try_new_object()?;
703
704        // Add each key-value pair (loop does nothing for empty maps - correct!)
705        for kv_index in start..end {
706            let key = self.key_strings.value(kv_index);
707            let mut field_builder = ObjectFieldBuilder::new(key, &mut object_builder);
708            self.values_builder
709                .append_row(&mut field_builder, kv_index)?;
710        }
711
712        object_builder.finish();
713        Ok(())
714    }
715}
716
717/// Union builder for both sparse and dense union arrays
718///
719/// NOTE: Union type ids are _not_ required to be dense, hence the hash map for child builders.
720pub(crate) struct UnionArrowToVariantBuilder<'a> {
721    union_array: &'a arrow::array::UnionArray,
722    child_builders: HashMap<i8, Box<ArrowToVariantRowBuilder<'a>>>,
723}
724
725impl<'a> UnionArrowToVariantBuilder<'a> {
726    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
727        let union_array = array.as_union();
728        let type_ids = union_array.type_ids();
729
730        // Create child builders for each union field
731        let mut child_builders = HashMap::new();
732        for &type_id in type_ids {
733            let child_array = union_array.child(type_id);
734            let child_builder = make_arrow_to_variant_row_builder(
735                child_array.data_type(),
736                child_array.as_ref(),
737                options,
738            )?;
739            child_builders.insert(type_id, Box::new(child_builder));
740        }
741
742        Ok(Self {
743            union_array,
744            child_builders,
745        })
746    }
747
748    fn append_row(
749        &mut self,
750        builder: &mut impl VariantBuilderExt,
751        index: usize,
752    ) -> Result<(), ArrowError> {
753        let type_id = self.union_array.type_id(index);
754        let value_offset = self.union_array.value_offset(index);
755
756        // Delegate to the appropriate child builder, or append null to handle an invalid type_id
757        match self.child_builders.get_mut(&type_id) {
758            Some(child_builder) => child_builder.append_row(builder, value_offset)?,
759            None => builder.append_null(),
760        }
761
762        Ok(())
763    }
764}
765
766/// Dictionary array builder with simple O(1) indexing
767pub(crate) struct DictionaryArrowToVariantBuilder<'a> {
768    keys: &'a dyn Array, // only needed for null checks
769    normalized_keys: Vec<usize>,
770    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
771}
772
773impl<'a> DictionaryArrowToVariantBuilder<'a> {
774    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
775        let dict_array = array.as_any_dictionary();
776        let values = dict_array.values();
777        let values_builder =
778            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
779
780        // WARNING: normalized_keys panics if values is empty
781        let normalized_keys = match values.len() {
782            0 => Vec::new(),
783            _ => dict_array.normalized_keys(),
784        };
785
786        Ok(Self {
787            keys: dict_array.keys(),
788            normalized_keys,
789            values_builder: Box::new(values_builder),
790        })
791    }
792
793    fn append_row(
794        &mut self,
795        builder: &mut impl VariantBuilderExt,
796        index: usize,
797    ) -> Result<(), ArrowError> {
798        if self.keys.is_null(index) {
799            builder.append_null();
800        } else {
801            let normalized_key = self.normalized_keys[index];
802            self.values_builder.append_row(builder, normalized_key)?;
803        }
804        Ok(())
805    }
806}
807
808/// Run-end encoded array builder with efficient sequential access
809pub(crate) struct RunEndEncodedArrowToVariantBuilder<'a, R: RunEndIndexType> {
810    run_array: &'a arrow::array::RunArray<R>,
811    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
812
813    run_ends: &'a [R::Native],
814    run_number: usize, // Physical index into run_ends and values
815    run_start: usize,  // Logical start index of current run
816}
817
818impl<'a, R: RunEndIndexType> RunEndEncodedArrowToVariantBuilder<'a, R> {
819    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
820        let Some(run_array) = array.as_run_opt() else {
821            return Err(ArrowError::CastError("Expected RunArray".to_string()));
822        };
823
824        let values = run_array.values();
825        let values_builder =
826            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
827
828        Ok(Self {
829            run_array,
830            values_builder: Box::new(values_builder),
831            run_ends: run_array.run_ends().values(),
832            run_number: 0,
833            run_start: 0,
834        })
835    }
836
837    fn set_run_for_index(&mut self, index: usize) -> Result<(), ArrowError> {
838        if index >= self.run_start {
839            let Some(run_end) = self.run_ends.get(self.run_number) else {
840                return Err(ArrowError::CastError(format!(
841                    "Index {index} beyond run array"
842                )));
843            };
844            if index < run_end.as_usize() {
845                return Ok(());
846            }
847            if index == run_end.as_usize() {
848                self.run_number += 1;
849                self.run_start = run_end.as_usize();
850                return Ok(());
851            }
852        }
853
854        // Use partition_point for all non-sequential cases
855        let run_number = self
856            .run_ends
857            .partition_point(|&run_end| run_end.as_usize() <= index);
858        if run_number >= self.run_ends.len() {
859            return Err(ArrowError::CastError(format!(
860                "Index {index} beyond run array"
861            )));
862        }
863        self.run_number = run_number;
864        self.run_start = match run_number {
865            0 => 0,
866            _ => self.run_ends[run_number - 1].as_usize(),
867        };
868        Ok(())
869    }
870
871    fn append_row(
872        &mut self,
873        builder: &mut impl VariantBuilderExt,
874        index: usize,
875    ) -> Result<(), ArrowError> {
876        self.set_run_for_index(index)?;
877
878        // Handle null values
879        if self.run_array.values().is_null(self.run_number) {
880            builder.append_null();
881            return Ok(());
882        }
883
884        // Re-encode the value
885        self.values_builder.append_row(builder, self.run_number)?;
886
887        Ok(())
888    }
889}
890
891#[cfg(test)]
892mod tests {
893    use super::*;
894    use crate::{VariantArray, VariantArrayBuilder};
895    use arrow::array::{ArrayRef, BooleanArray, Int32Array, StringArray};
896    use arrow::datatypes::Int32Type;
897    use std::sync::Arc;
898
899    /// Builds a VariantArray from an Arrow array using the row builder.
900    fn execute_row_builder_test(array: &dyn Array) -> VariantArray {
901        execute_row_builder_test_with_options(
902            array,
903            CastOptions {
904                safe: false,
905                ..Default::default()
906            },
907        )
908    }
909
910    /// Variant of `execute_row_builder_test` that allows specifying options
911    fn execute_row_builder_test_with_options(
912        array: &dyn Array,
913        options: CastOptions,
914    ) -> VariantArray {
915        let mut row_builder =
916            make_arrow_to_variant_row_builder(array.data_type(), array, &options).unwrap();
917
918        let mut array_builder = VariantArrayBuilder::new(array.len());
919
920        // The repetitive loop that appears in every test
921        for i in 0..array.len() {
922            row_builder.append_row(&mut array_builder, i).unwrap();
923        }
924
925        let variant_array = array_builder.build();
926        assert_eq!(variant_array.len(), array.len());
927        variant_array
928    }
929
930    /// Generic helper function to test row builders with basic assertion patterns.
931    /// Uses execute_row_builder_test and adds simple value comparison assertions.
932    fn test_row_builder_basic(array: &dyn Array, expected_values: Vec<Option<Variant>>) {
933        test_row_builder_basic_with_options(
934            array,
935            expected_values,
936            CastOptions {
937                safe: false,
938                ..Default::default()
939            },
940        );
941    }
942
943    /// Variant of `test_row_builder_basic` that allows specifying options
944    fn test_row_builder_basic_with_options(
945        array: &dyn Array,
946        expected_values: Vec<Option<Variant>>,
947        options: CastOptions,
948    ) {
949        let variant_array = execute_row_builder_test_with_options(array, options);
950
951        // The repetitive assertion pattern
952        for (i, expected) in expected_values.iter().enumerate() {
953            match expected {
954                Some(variant) => {
955                    assert_eq!(variant_array.value(i), *variant, "Mismatch at index {}", i)
956                }
957                None => assert!(variant_array.is_null(i), "Expected null at index {}", i),
958            }
959        }
960    }
961
962    #[test]
963    fn test_primitive_row_builder() {
964        let int_array = Int32Array::from(vec![Some(42), None, Some(100)]);
965        test_row_builder_basic(
966            &int_array,
967            vec![Some(Variant::Int32(42)), None, Some(Variant::Int32(100))],
968        );
969    }
970
971    #[test]
972    fn test_string_row_builder() {
973        let string_array = StringArray::from(vec![Some("hello"), None, Some("world")]);
974        test_row_builder_basic(
975            &string_array,
976            vec![
977                Some(Variant::from("hello")),
978                None,
979                Some(Variant::from("world")),
980            ],
981        );
982    }
983
984    #[test]
985    fn test_boolean_row_builder() {
986        let bool_array = BooleanArray::from(vec![Some(true), None, Some(false)]);
987        test_row_builder_basic(
988            &bool_array,
989            vec![Some(Variant::from(true)), None, Some(Variant::from(false))],
990        );
991    }
992
993    #[test]
994    fn test_struct_row_builder() {
995        use arrow::array::{ArrayRef, Int32Array, StringArray, StructArray};
996        use arrow_schema::{DataType, Field};
997        use std::sync::Arc;
998
999        // Create a struct array with int and string fields
1000        let int_field = Field::new("id", DataType::Int32, true);
1001        let string_field = Field::new("name", DataType::Utf8, true);
1002
1003        let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1004        let string_array = StringArray::from(vec![Some("Alice"), Some("Bob"), None]);
1005
1006        let struct_array = StructArray::try_new(
1007            vec![int_field, string_field].into(),
1008            vec![
1009                Arc::new(int_array) as ArrayRef,
1010                Arc::new(string_array) as ArrayRef,
1011            ],
1012            None,
1013        )
1014        .unwrap();
1015
1016        let variant_array = execute_row_builder_test(&struct_array);
1017
1018        // Check first row - should have both fields
1019        let first_variant = variant_array.value(0);
1020        assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1021        assert_eq!(
1022            first_variant.get_object_field("name"),
1023            Some(Variant::from("Alice"))
1024        );
1025
1026        // Check second row - should have name field but not id (null field omitted)
1027        let second_variant = variant_array.value(1);
1028        assert_eq!(second_variant.get_object_field("id"), None); // null field omitted
1029        assert_eq!(
1030            second_variant.get_object_field("name"),
1031            Some(Variant::from("Bob"))
1032        );
1033
1034        // Check third row - should have id field but not name (null field omitted)
1035        let third_variant = variant_array.value(2);
1036        assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(3)));
1037        assert_eq!(third_variant.get_object_field("name"), None); // null field omitted
1038    }
1039
1040    #[test]
1041    fn test_run_end_encoded_row_builder() {
1042        use arrow::array::{Int32Array, RunArray};
1043        use arrow::datatypes::Int32Type;
1044
1045        // Create a run-end encoded array: [A, A, B, B, B, C]
1046        // run_ends: [2, 5, 6]
1047        // values: ["A", "B", "C"]
1048        let values = StringArray::from(vec!["A", "B", "C"]);
1049        let run_ends = Int32Array::from(vec![2, 5, 6]);
1050        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1051
1052        let variant_array = execute_row_builder_test(&run_array);
1053
1054        // Verify the values
1055        assert_eq!(variant_array.value(0), Variant::from("A")); // Run 0
1056        assert_eq!(variant_array.value(1), Variant::from("A")); // Run 0
1057        assert_eq!(variant_array.value(2), Variant::from("B")); // Run 1
1058        assert_eq!(variant_array.value(3), Variant::from("B")); // Run 1
1059        assert_eq!(variant_array.value(4), Variant::from("B")); // Run 1
1060        assert_eq!(variant_array.value(5), Variant::from("C")); // Run 2
1061    }
1062
1063    #[test]
1064    fn test_run_end_encoded_random_access() {
1065        use arrow::array::{Int32Array, RunArray};
1066        use arrow::datatypes::Int32Type;
1067
1068        // Create a run-end encoded array: [A, A, B, B, B, C]
1069        let values = StringArray::from(vec!["A", "B", "C"]);
1070        let run_ends = Int32Array::from(vec![2, 5, 6]);
1071        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1072
1073        let options = CastOptions {
1074            safe: false,
1075            ..Default::default()
1076        };
1077        let mut row_builder =
1078            make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1079
1080        // Test random access pattern (backward jumps, forward jumps)
1081        let access_pattern = [0, 5, 2, 4, 1, 3]; // Mix of all cases
1082        let expected_values = ["A", "C", "B", "B", "A", "B"];
1083
1084        for (i, &index) in access_pattern.iter().enumerate() {
1085            let mut array_builder = VariantArrayBuilder::new(1);
1086            row_builder.append_row(&mut array_builder, index).unwrap();
1087            let variant_array = array_builder.build();
1088            assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1089        }
1090    }
1091
1092    #[test]
1093    fn test_run_end_encoded_with_nulls() {
1094        use arrow::array::{Int32Array, RunArray};
1095        use arrow::datatypes::Int32Type;
1096
1097        // Create a run-end encoded array with null values: [A, A, null, null, B]
1098        let values = StringArray::from(vec![Some("A"), None, Some("B")]);
1099        let run_ends = Int32Array::from(vec![2, 4, 5]);
1100        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1101
1102        let options = CastOptions {
1103            safe: false,
1104            ..Default::default()
1105        };
1106        let mut row_builder =
1107            make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1108        let mut array_builder = VariantArrayBuilder::new(5);
1109
1110        // Test sequential access
1111        for i in 0..5 {
1112            row_builder.append_row(&mut array_builder, i).unwrap();
1113        }
1114
1115        let variant_array = array_builder.build();
1116        assert_eq!(variant_array.len(), 5);
1117
1118        // Verify the values
1119        assert_eq!(variant_array.value(0), Variant::from("A")); // Run 0
1120        assert_eq!(variant_array.value(1), Variant::from("A")); // Run 0
1121        assert!(variant_array.is_null(2)); // Run 1 (null)
1122        assert!(variant_array.is_null(3)); // Run 1 (null)
1123        assert_eq!(variant_array.value(4), Variant::from("B")); // Run 2
1124    }
1125
1126    #[test]
1127    fn test_dictionary_row_builder() {
1128        use arrow::array::{DictionaryArray, Int32Array};
1129        use arrow::datatypes::Int32Type;
1130
1131        // Create a dictionary array: keys=[0, 1, 0, 2, 1], values=["apple", "banana", "cherry"]
1132        let values = StringArray::from(vec!["apple", "banana", "cherry"]);
1133        let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1134        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1135
1136        let variant_array = execute_row_builder_test(&dict_array);
1137
1138        // Verify the values match the dictionary lookup
1139        assert_eq!(variant_array.value(0), Variant::from("apple")); // keys[0] = 0 -> values[0] = "apple"
1140        assert_eq!(variant_array.value(1), Variant::from("banana")); // keys[1] = 1 -> values[1] = "banana"
1141        assert_eq!(variant_array.value(2), Variant::from("apple")); // keys[2] = 0 -> values[0] = "apple"
1142        assert_eq!(variant_array.value(3), Variant::from("cherry")); // keys[3] = 2 -> values[2] = "cherry"
1143        assert_eq!(variant_array.value(4), Variant::from("banana")); // keys[4] = 1 -> values[1] = "banana"
1144    }
1145
1146    #[test]
1147    fn test_dictionary_with_nulls() {
1148        use arrow::array::{DictionaryArray, Int32Array};
1149        use arrow::datatypes::Int32Type;
1150
1151        // Create a dictionary array with null keys: keys=[0, null, 1, null, 2], values=["x", "y", "z"]
1152        let values = StringArray::from(vec!["x", "y", "z"]);
1153        let keys = Int32Array::from(vec![Some(0), None, Some(1), None, Some(2)]);
1154        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1155
1156        let options = CastOptions {
1157            safe: false,
1158            ..Default::default()
1159        };
1160        let mut row_builder =
1161            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1162                .unwrap();
1163        let mut array_builder = VariantArrayBuilder::new(5);
1164
1165        // Test sequential access
1166        for i in 0..5 {
1167            row_builder.append_row(&mut array_builder, i).unwrap();
1168        }
1169
1170        let variant_array = array_builder.build();
1171        assert_eq!(variant_array.len(), 5);
1172
1173        // Verify the values and nulls
1174        assert_eq!(variant_array.value(0), Variant::from("x")); // keys[0] = 0 -> values[0] = "x"
1175        assert!(variant_array.is_null(1)); // keys[1] = null
1176        assert_eq!(variant_array.value(2), Variant::from("y")); // keys[2] = 1 -> values[1] = "y"
1177        assert!(variant_array.is_null(3)); // keys[3] = null
1178        assert_eq!(variant_array.value(4), Variant::from("z")); // keys[4] = 2 -> values[2] = "z"
1179    }
1180
1181    #[test]
1182    fn test_dictionary_random_access() {
1183        use arrow::array::{DictionaryArray, Int32Array};
1184        use arrow::datatypes::Int32Type;
1185
1186        // Create a dictionary array: keys=[0, 1, 2, 0, 1, 2], values=["red", "green", "blue"]
1187        let values = StringArray::from(vec!["red", "green", "blue"]);
1188        let keys = Int32Array::from(vec![0, 1, 2, 0, 1, 2]);
1189        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1190
1191        let options = CastOptions {
1192            safe: false,
1193            ..Default::default()
1194        };
1195        let mut row_builder =
1196            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1197                .unwrap();
1198
1199        // Test random access pattern
1200        let access_pattern = [5, 0, 3, 1, 4, 2]; // Random order
1201        let expected_values = ["blue", "red", "red", "green", "green", "blue"];
1202
1203        for (i, &index) in access_pattern.iter().enumerate() {
1204            let mut array_builder = VariantArrayBuilder::new(1);
1205            row_builder.append_row(&mut array_builder, index).unwrap();
1206            let variant_array = array_builder.build();
1207            assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1208        }
1209    }
1210
1211    #[test]
1212    fn test_nested_dictionary() {
1213        use arrow::array::{DictionaryArray, Int32Array, StructArray};
1214        use arrow::datatypes::{Field, Int32Type};
1215
1216        // Create a dictionary with struct values
1217        let id_array = Int32Array::from(vec![1, 2, 3]);
1218        let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
1219        let struct_array = StructArray::from(vec![
1220            (
1221                Arc::new(Field::new("id", DataType::Int32, false)),
1222                Arc::new(id_array) as ArrayRef,
1223            ),
1224            (
1225                Arc::new(Field::new("name", DataType::Utf8, false)),
1226                Arc::new(name_array) as ArrayRef,
1227            ),
1228        ]);
1229
1230        let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1231        let dict_array =
1232            DictionaryArray::<Int32Type>::try_new(keys, Arc::new(struct_array)).unwrap();
1233
1234        let options = CastOptions {
1235            safe: false,
1236            ..Default::default()
1237        };
1238        let mut row_builder =
1239            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1240                .unwrap();
1241        let mut array_builder = VariantArrayBuilder::new(5);
1242
1243        // Test sequential access
1244        for i in 0..5 {
1245            row_builder.append_row(&mut array_builder, i).unwrap();
1246        }
1247
1248        let variant_array = array_builder.build();
1249        assert_eq!(variant_array.len(), 5);
1250
1251        // Verify the nested struct values
1252        let first_variant = variant_array.value(0);
1253        assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1254        assert_eq!(
1255            first_variant.get_object_field("name"),
1256            Some(Variant::from("Alice"))
1257        );
1258
1259        let second_variant = variant_array.value(1);
1260        assert_eq!(
1261            second_variant.get_object_field("id"),
1262            Some(Variant::from(2))
1263        );
1264        assert_eq!(
1265            second_variant.get_object_field("name"),
1266            Some(Variant::from("Bob"))
1267        );
1268
1269        // Test that repeated keys give same values
1270        let third_variant = variant_array.value(2);
1271        assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(1)));
1272        assert_eq!(
1273            third_variant.get_object_field("name"),
1274            Some(Variant::from("Alice"))
1275        );
1276    }
1277
1278    #[test]
1279    fn test_list_row_builder() {
1280        use arrow::array::ListArray;
1281
1282        // Create a list array: [[1, 2], [3, 4, 5], null, []]
1283        let data = vec![
1284            Some(vec![Some(1), Some(2)]),
1285            Some(vec![Some(3), Some(4), Some(5)]),
1286            None,
1287            Some(vec![]),
1288        ];
1289        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1290
1291        let variant_array = execute_row_builder_test(&list_array);
1292
1293        // Row 0: [1, 2]
1294        let row0 = variant_array.value(0);
1295        let list0 = row0.as_list().unwrap();
1296        assert_eq!(list0.len(), 2);
1297        assert_eq!(list0.get(0), Some(Variant::from(1)));
1298        assert_eq!(list0.get(1), Some(Variant::from(2)));
1299
1300        // Row 1: [3, 4, 5]
1301        let row1 = variant_array.value(1);
1302        let list1 = row1.as_list().unwrap();
1303        assert_eq!(list1.len(), 3);
1304        assert_eq!(list1.get(0), Some(Variant::from(3)));
1305        assert_eq!(list1.get(1), Some(Variant::from(4)));
1306        assert_eq!(list1.get(2), Some(Variant::from(5)));
1307
1308        // Row 2: null
1309        assert!(variant_array.is_null(2));
1310
1311        // Row 3: []
1312        let row3 = variant_array.value(3);
1313        let list3 = row3.as_list().unwrap();
1314        assert_eq!(list3.len(), 0);
1315    }
1316
1317    #[test]
1318    fn test_sliced_list_row_builder() {
1319        use arrow::array::ListArray;
1320
1321        // Create a list array: [[1, 2], [3, 4, 5], [6]]
1322        let data = vec![
1323            Some(vec![Some(1), Some(2)]),
1324            Some(vec![Some(3), Some(4), Some(5)]),
1325            Some(vec![Some(6)]),
1326        ];
1327        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1328
1329        // Slice to get just the middle element: [[3, 4, 5]]
1330        let sliced_array = list_array.slice(1, 1);
1331
1332        let options = CastOptions {
1333            safe: false,
1334            ..Default::default()
1335        };
1336        let mut row_builder =
1337            make_arrow_to_variant_row_builder(sliced_array.data_type(), &sliced_array, &options)
1338                .unwrap();
1339        let mut variant_array_builder = VariantArrayBuilder::new(sliced_array.len());
1340
1341        // Test the single row
1342        row_builder
1343            .append_row(&mut variant_array_builder, 0)
1344            .unwrap();
1345        let variant_array = variant_array_builder.build();
1346
1347        // Verify result
1348        assert_eq!(variant_array.len(), 1);
1349
1350        // Row 0: [3, 4, 5]
1351        let row0 = variant_array.value(0);
1352        let list0 = row0.as_list().unwrap();
1353        assert_eq!(list0.len(), 3);
1354        assert_eq!(list0.get(0), Some(Variant::from(3)));
1355        assert_eq!(list0.get(1), Some(Variant::from(4)));
1356        assert_eq!(list0.get(2), Some(Variant::from(5)));
1357    }
1358
1359    #[test]
1360    fn test_nested_list_row_builder() {
1361        use arrow::array::ListArray;
1362        use arrow::datatypes::Field;
1363
1364        // Build the nested structure manually
1365        let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
1366        let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_field), true));
1367
1368        let values_data = vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])];
1369        let values_list = ListArray::from_iter_primitive::<Int32Type, _, _>(values_data);
1370
1371        let outer_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 2].into());
1372        let outer_list = ListArray::new(
1373            inner_list_field,
1374            outer_offsets,
1375            Arc::new(values_list),
1376            Some(arrow::buffer::NullBuffer::from(vec![true, false])),
1377        );
1378
1379        let options = CastOptions {
1380            safe: false,
1381            ..Default::default()
1382        };
1383        let mut row_builder =
1384            make_arrow_to_variant_row_builder(outer_list.data_type(), &outer_list, &options)
1385                .unwrap();
1386        let mut variant_array_builder = VariantArrayBuilder::new(outer_list.len());
1387
1388        for i in 0..outer_list.len() {
1389            row_builder
1390                .append_row(&mut variant_array_builder, i)
1391                .unwrap();
1392        }
1393
1394        let variant_array = variant_array_builder.build();
1395
1396        // Verify results
1397        assert_eq!(variant_array.len(), 2);
1398
1399        // Row 0: [[1, 2], [3]]
1400        let row0 = variant_array.value(0);
1401        let outer_list0 = row0.as_list().unwrap();
1402        assert_eq!(outer_list0.len(), 2);
1403
1404        let inner_list0_0 = outer_list0.get(0).unwrap();
1405        let inner_list0_0 = inner_list0_0.as_list().unwrap();
1406        assert_eq!(inner_list0_0.len(), 2);
1407        assert_eq!(inner_list0_0.get(0), Some(Variant::from(1)));
1408        assert_eq!(inner_list0_0.get(1), Some(Variant::from(2)));
1409
1410        let inner_list0_1 = outer_list0.get(1).unwrap();
1411        let inner_list0_1 = inner_list0_1.as_list().unwrap();
1412        assert_eq!(inner_list0_1.len(), 1);
1413        assert_eq!(inner_list0_1.get(0), Some(Variant::from(3)));
1414
1415        // Row 1: null
1416        assert!(variant_array.is_null(1));
1417    }
1418
1419    #[test]
1420    fn test_map_row_builder() {
1421        use arrow::array::{Int32Array, MapArray, StringArray, StructArray};
1422        use arrow::buffer::{NullBuffer, OffsetBuffer};
1423        use arrow::datatypes::{DataType, Field, Fields};
1424        use std::sync::Arc;
1425
1426        // Create the entries struct array (key-value pairs)
1427        let keys = StringArray::from(vec!["key1", "key2", "key3"]);
1428        let values = Int32Array::from(vec![1, 2, 3]);
1429        let entries_fields = Fields::from(vec![
1430            Field::new("key", DataType::Utf8, false),
1431            Field::new("value", DataType::Int32, true),
1432        ]);
1433        let entries = StructArray::new(
1434            entries_fields.clone(),
1435            vec![Arc::new(keys), Arc::new(values)],
1436            None, // No nulls in the entries themselves
1437        );
1438
1439        // Create offsets for 4 maps: [0..1], [1..1], [1..1], [1..3]
1440        // Map 0: {"key1": 1}    (1 entry)
1441        // Map 1: {}             (0 entries - empty)
1442        // Map 2: null           (0 entries but NULL via null buffer)
1443        // Map 3: {"key2": 2, "key3": 3}  (2 entries)
1444        let offsets = OffsetBuffer::new(vec![0, 1, 1, 1, 3].into());
1445
1446        // Create null buffer - map at index 2 is NULL
1447        let null_buffer = Some(NullBuffer::from(vec![true, true, false, true]));
1448
1449        // Create the map field
1450        let map_field = Arc::new(Field::new(
1451            "entries",
1452            DataType::Struct(entries_fields),
1453            false, // Keys are non-nullable
1454        ));
1455
1456        // Create MapArray using try_new
1457        let map_array = MapArray::try_new(
1458            map_field,
1459            offsets,
1460            entries,
1461            null_buffer,
1462            false, // not ordered
1463        )
1464        .unwrap();
1465
1466        let variant_array = execute_row_builder_test(&map_array);
1467
1468        // Map 0: {"key1": 1}
1469        let map0 = variant_array.value(0);
1470        let obj0 = map0.as_object().unwrap();
1471        assert_eq!(obj0.len(), 1);
1472        assert_eq!(obj0.get("key1"), Some(Variant::from(1)));
1473
1474        // Map 1: {} (empty object, not null)
1475        let map1 = variant_array.value(1);
1476        let obj1 = map1.as_object().unwrap();
1477        assert_eq!(obj1.len(), 0); // Empty object
1478
1479        // Map 2: null (actual NULL)
1480        assert!(variant_array.is_null(2));
1481
1482        // Map 3: {"key2": 2, "key3": 3}
1483        let map3 = variant_array.value(3);
1484        let obj3 = map3.as_object().unwrap();
1485        assert_eq!(obj3.len(), 2);
1486        assert_eq!(obj3.get("key2"), Some(Variant::from(2)));
1487        assert_eq!(obj3.get("key3"), Some(Variant::from(3)));
1488    }
1489
1490    #[test]
1491    fn test_union_sparse_row_builder() {
1492        use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1493        use arrow::buffer::ScalarBuffer;
1494        use arrow::datatypes::{DataType, Field, UnionFields};
1495        use std::sync::Arc;
1496
1497        // Create a sparse union array with mixed types (int, float, string)
1498        let int_array = Int32Array::from(vec![Some(1), None, None, None, Some(34), None]);
1499        let float_array = Float64Array::from(vec![None, Some(3.2), None, Some(32.5), None, None]);
1500        let string_array = StringArray::from(vec![None, None, Some("hello"), None, None, None]);
1501        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1502
1503        let union_fields = UnionFields::from_fields(vec![
1504            Field::new("int_field", DataType::Int32, false),
1505            Field::new("float_field", DataType::Float64, false),
1506            Field::new("string_field", DataType::Utf8, false),
1507        ]);
1508
1509        let children: Vec<Arc<dyn Array>> = vec![
1510            Arc::new(int_array),
1511            Arc::new(float_array),
1512            Arc::new(string_array),
1513        ];
1514
1515        let union_array = UnionArray::try_new(
1516            union_fields,
1517            type_ids,
1518            None, // Sparse union
1519            children,
1520        )
1521        .unwrap();
1522
1523        let variant_array = execute_row_builder_test(&union_array);
1524        assert_eq!(variant_array.value(0), Variant::Int32(1));
1525        assert_eq!(variant_array.value(1), Variant::Double(3.2));
1526        assert_eq!(variant_array.value(2), Variant::from("hello"));
1527        assert_eq!(variant_array.value(3), Variant::Double(32.5));
1528        assert_eq!(variant_array.value(4), Variant::Int32(34));
1529        assert!(variant_array.is_null(5));
1530    }
1531
1532    #[test]
1533    fn test_union_dense_row_builder() {
1534        use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1535        use arrow::buffer::ScalarBuffer;
1536        use arrow::datatypes::{DataType, Field, UnionFields};
1537        use std::sync::Arc;
1538
1539        // Create a dense union array with mixed types (int, float, string)
1540        let int_array = Int32Array::from(vec![Some(1), Some(34), None]);
1541        let float_array = Float64Array::from(vec![3.2, 32.5]);
1542        let string_array = StringArray::from(vec!["hello"]);
1543        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1544        let offsets = [0, 0, 0, 1, 1, 2]
1545            .into_iter()
1546            .collect::<ScalarBuffer<i32>>();
1547
1548        let union_fields = UnionFields::from_fields(vec![
1549            Field::new("int_field", DataType::Int32, false),
1550            Field::new("float_field", DataType::Float64, false),
1551            Field::new("string_field", DataType::Utf8, false),
1552        ]);
1553
1554        let children: Vec<Arc<dyn Array>> = vec![
1555            Arc::new(int_array),
1556            Arc::new(float_array),
1557            Arc::new(string_array),
1558        ];
1559
1560        let union_array = UnionArray::try_new(
1561            union_fields,
1562            type_ids,
1563            Some(offsets), // Dense union
1564            children,
1565        )
1566        .unwrap();
1567
1568        // Test the row builder
1569        let options = CastOptions {
1570            safe: false,
1571            ..Default::default()
1572        };
1573        let mut row_builder =
1574            make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1575                .unwrap();
1576
1577        let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1578        for i in 0..union_array.len() {
1579            row_builder.append_row(&mut variant_builder, i).unwrap();
1580        }
1581        let variant_array = variant_builder.build();
1582
1583        assert_eq!(variant_array.len(), 6);
1584        assert_eq!(variant_array.value(0), Variant::Int32(1));
1585        assert_eq!(variant_array.value(1), Variant::Double(3.2));
1586        assert_eq!(variant_array.value(2), Variant::from("hello"));
1587        assert_eq!(variant_array.value(3), Variant::Double(32.5));
1588        assert_eq!(variant_array.value(4), Variant::Int32(34));
1589        assert!(variant_array.is_null(5));
1590    }
1591
1592    #[test]
1593    fn test_union_sparse_type_ids_row_builder() {
1594        use arrow::array::{Int32Array, StringArray, UnionArray};
1595        use arrow::buffer::ScalarBuffer;
1596        use arrow::datatypes::{DataType, Field, UnionFields};
1597        use std::sync::Arc;
1598
1599        // Create a sparse union with non-contiguous type IDs (1, 3)
1600        let int_array = Int32Array::from(vec![Some(42), None]);
1601        let string_array = StringArray::from(vec![None, Some("test")]);
1602        let type_ids = [1, 3].into_iter().collect::<ScalarBuffer<i8>>();
1603
1604        let union_fields = UnionFields::try_new(
1605            vec![1, 3], // Non-contiguous type IDs
1606            vec![
1607                Field::new("int_field", DataType::Int32, false),
1608                Field::new("string_field", DataType::Utf8, false),
1609            ],
1610        )
1611        .unwrap();
1612
1613        let children: Vec<Arc<dyn Array>> = vec![Arc::new(int_array), Arc::new(string_array)];
1614
1615        let union_array = UnionArray::try_new(
1616            union_fields,
1617            type_ids,
1618            None, // Sparse union
1619            children,
1620        )
1621        .unwrap();
1622
1623        // Test the row builder
1624        let options = CastOptions {
1625            safe: false,
1626            ..Default::default()
1627        };
1628        let mut row_builder =
1629            make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1630                .unwrap();
1631
1632        let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1633        for i in 0..union_array.len() {
1634            row_builder.append_row(&mut variant_builder, i).unwrap();
1635        }
1636        let variant_array = variant_builder.build();
1637
1638        // Verify results
1639        assert_eq!(variant_array.len(), 2);
1640
1641        // Row 0: int 42 (type_id = 1)
1642        assert_eq!(variant_array.value(0), Variant::Int32(42));
1643
1644        // Row 1: string "test" (type_id = 3)
1645        assert_eq!(variant_array.value(1), Variant::from("test"));
1646    }
1647
1648    #[test]
1649    fn test_decimal32_row_builder() {
1650        use arrow::array::Decimal32Array;
1651        use parquet_variant::VariantDecimal4;
1652
1653        // Test Decimal32Array with scale 2 (e.g., for currency: 12.34)
1654        let decimal_array = Decimal32Array::from(vec![Some(1234), None, Some(-5678)])
1655            .with_precision_and_scale(9, 2)
1656            .unwrap();
1657
1658        test_row_builder_basic(
1659            &decimal_array,
1660            vec![
1661                Some(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())),
1662                None,
1663                Some(Variant::from(VariantDecimal4::try_new(-5678, 2).unwrap())),
1664            ],
1665        );
1666    }
1667
1668    #[test]
1669    fn test_decimal128_row_builder() {
1670        use arrow::array::Decimal128Array;
1671        use parquet_variant::VariantDecimal16;
1672
1673        // Test Decimal128Array with negative scale (multiply by 10^|scale|)
1674        let decimal_array = Decimal128Array::from(vec![Some(123), None, Some(456)])
1675            .with_precision_and_scale(10, -2)
1676            .unwrap();
1677
1678        test_row_builder_basic(
1679            &decimal_array,
1680            vec![
1681                Some(Variant::from(VariantDecimal16::try_new(12300, 0).unwrap())),
1682                None,
1683                Some(Variant::from(VariantDecimal16::try_new(45600, 0).unwrap())),
1684            ],
1685        );
1686    }
1687
1688    #[test]
1689    fn test_decimal256_overflow_row_builder() {
1690        use arrow::array::Decimal256Array;
1691        use arrow::datatypes::i256;
1692
1693        // Test Decimal256Array with a value that overflows i128
1694        let large_value = i256::from_i128(i128::MAX) + i256::from(1); // Overflows i128
1695        let decimal_array = Decimal256Array::from(vec![Some(large_value), Some(i256::from(123))])
1696            .with_precision_and_scale(76, 3)
1697            .unwrap();
1698
1699        test_row_builder_basic_with_options(
1700            &decimal_array,
1701            vec![
1702                Some(Variant::Null), // Overflow value becomes Variant::Null
1703                Some(Variant::from(VariantDecimal16::try_new(123, 3).unwrap())),
1704            ],
1705            CastOptions::default(),
1706        );
1707    }
1708
1709    #[test]
1710    fn test_binary_row_builder() {
1711        use arrow::array::BinaryArray;
1712
1713        let binary_data = vec![
1714            Some(b"hello".as_slice()),
1715            None,
1716            Some(b"\x00\x01\x02\xFF".as_slice()),
1717            Some(b"".as_slice()), // Empty binary
1718        ];
1719        let binary_array = BinaryArray::from(binary_data);
1720
1721        test_row_builder_basic(
1722            &binary_array,
1723            vec![
1724                Some(Variant::from(b"hello".as_slice())),
1725                None,
1726                Some(Variant::from([0x00, 0x01, 0x02, 0xFF].as_slice())),
1727                Some(Variant::from([].as_slice())),
1728            ],
1729        );
1730    }
1731
1732    #[test]
1733    fn test_binary_view_row_builder() {
1734        use arrow::array::BinaryViewArray;
1735
1736        let binary_data = vec![
1737            Some(b"short".as_slice()),
1738            None,
1739            Some(b"this is a longer binary view that exceeds inline storage".as_slice()),
1740        ];
1741        let binary_view_array = BinaryViewArray::from(binary_data);
1742
1743        test_row_builder_basic(
1744            &binary_view_array,
1745            vec![
1746                Some(Variant::from(b"short".as_slice())),
1747                None,
1748                Some(Variant::from(
1749                    b"this is a longer binary view that exceeds inline storage".as_slice(),
1750                )),
1751            ],
1752        );
1753    }
1754
1755    #[test]
1756    fn test_fixed_size_binary_row_builder() {
1757        use arrow::array::FixedSizeBinaryArray;
1758
1759        let binary_data = vec![
1760            Some([0x01, 0x02, 0x03, 0x04]),
1761            None,
1762            Some([0xFF, 0xFE, 0xFD, 0xFC]),
1763        ];
1764        let fixed_binary_array =
1765            FixedSizeBinaryArray::try_from_sparse_iter_with_size(binary_data.into_iter(), 4)
1766                .unwrap();
1767
1768        test_row_builder_basic(
1769            &fixed_binary_array,
1770            vec![
1771                Some(Variant::from([0x01, 0x02, 0x03, 0x04].as_slice())),
1772                None,
1773                Some(Variant::from([0xFF, 0xFE, 0xFD, 0xFC].as_slice())),
1774            ],
1775        );
1776    }
1777
1778    #[test]
1779    fn test_utf8_view_row_builder() {
1780        use arrow::array::StringViewArray;
1781
1782        let string_data = vec![
1783            Some("short"),
1784            None,
1785            Some("this is a much longer string that will be stored out-of-line in the buffer"),
1786        ];
1787        let string_view_array = StringViewArray::from(string_data);
1788
1789        test_row_builder_basic(
1790            &string_view_array,
1791            vec![
1792                Some(Variant::from("short")),
1793                None,
1794                Some(Variant::from(
1795                    "this is a much longer string that will be stored out-of-line in the buffer",
1796                )),
1797            ],
1798        );
1799    }
1800
1801    #[test]
1802    fn test_timestamp_second_row_builder() {
1803        use arrow::array::TimestampSecondArray;
1804
1805        let timestamp_data = vec![
1806            Some(1609459200), // 2021-01-01 00:00:00 UTC
1807            None,
1808            Some(1640995200), // 2022-01-01 00:00:00 UTC
1809        ];
1810        let timestamp_array = TimestampSecondArray::from(timestamp_data);
1811
1812        let expected_naive1 = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1813        let expected_naive2 = DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc();
1814
1815        test_row_builder_basic(
1816            &timestamp_array,
1817            vec![
1818                Some(Variant::from(expected_naive1)),
1819                None,
1820                Some(Variant::from(expected_naive2)),
1821            ],
1822        );
1823    }
1824
1825    #[test]
1826    fn test_timestamp_with_timezone_row_builder() {
1827        use arrow::array::TimestampMicrosecondArray;
1828        use chrono::DateTime;
1829
1830        let timestamp_data = vec![
1831            Some(1609459200000000), // 2021-01-01 00:00:00 UTC (in microseconds)
1832            None,
1833            Some(1640995200000000), // 2022-01-01 00:00:00 UTC (in microseconds)
1834        ];
1835        let timezone = "UTC".to_string();
1836        let timestamp_array =
1837            TimestampMicrosecondArray::from(timestamp_data).with_timezone(timezone);
1838
1839        let expected_utc1 = DateTime::from_timestamp(1609459200, 0).unwrap();
1840        let expected_utc2 = DateTime::from_timestamp(1640995200, 0).unwrap();
1841
1842        test_row_builder_basic(
1843            &timestamp_array,
1844            vec![
1845                Some(Variant::from(expected_utc1)),
1846                None,
1847                Some(Variant::from(expected_utc2)),
1848            ],
1849        );
1850    }
1851
1852    #[test]
1853    fn test_timestamp_nanosecond_precision_row_builder() {
1854        use arrow::array::TimestampNanosecondArray;
1855
1856        let timestamp_data = vec![
1857            Some(1609459200123456789), // 2021-01-01 00:00:00.123456789 UTC
1858            None,
1859            Some(1609459200000000000), // 2021-01-01 00:00:00.000000000 UTC (no fractional seconds)
1860        ];
1861        let timestamp_array = TimestampNanosecondArray::from(timestamp_data);
1862
1863        let expected_with_nanos = DateTime::from_timestamp(1609459200, 123456789)
1864            .unwrap()
1865            .naive_utc();
1866        let expected_no_nanos = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1867
1868        test_row_builder_basic(
1869            &timestamp_array,
1870            vec![
1871                Some(Variant::from(expected_with_nanos)),
1872                None,
1873                Some(Variant::from(expected_no_nanos)),
1874            ],
1875        );
1876    }
1877
1878    #[test]
1879    fn test_timestamp_millisecond_row_builder() {
1880        use arrow::array::TimestampMillisecondArray;
1881
1882        let timestamp_data = vec![
1883            Some(1609459200123), // 2021-01-01 00:00:00.123 UTC
1884            None,
1885            Some(1609459200000), // 2021-01-01 00:00:00.000 UTC
1886        ];
1887        let timestamp_array = TimestampMillisecondArray::from(timestamp_data);
1888
1889        let expected_with_millis = DateTime::from_timestamp(1609459200, 123000000)
1890            .unwrap()
1891            .naive_utc();
1892        let expected_no_millis = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1893
1894        test_row_builder_basic(
1895            &timestamp_array,
1896            vec![
1897                Some(Variant::from(expected_with_millis)),
1898                None,
1899                Some(Variant::from(expected_no_millis)),
1900            ],
1901        );
1902    }
1903
1904    #[test]
1905    fn test_date32_row_builder() {
1906        use arrow::array::Date32Array;
1907        use chrono::NaiveDate;
1908
1909        let date_data = vec![
1910            Some(0), // 1970-01-01
1911            None,
1912            Some(19723),   // 2024-01-01 (days since epoch)
1913            Some(-719162), // 0001-01-01 (near minimum)
1914        ];
1915        let date_array = Date32Array::from(date_data);
1916
1917        let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1918        let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1919        let expected_min = NaiveDate::from_ymd_opt(1, 1, 1).unwrap();
1920
1921        test_row_builder_basic(
1922            &date_array,
1923            vec![
1924                Some(Variant::from(expected_epoch)),
1925                None,
1926                Some(Variant::from(expected_2024)),
1927                Some(Variant::from(expected_min)),
1928            ],
1929        );
1930    }
1931
1932    #[test]
1933    fn test_date64_row_builder() {
1934        use arrow::array::Date64Array;
1935        use chrono::NaiveDate;
1936
1937        // Test Date64Array with various dates (milliseconds since epoch)
1938        let date_data = vec![
1939            Some(0), // 1970-01-01
1940            None,
1941            Some(1704067200000), // 2024-01-01 (milliseconds since epoch)
1942            Some(86400000),      // 1970-01-02
1943        ];
1944        let date_array = Date64Array::from(date_data);
1945
1946        let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1947        let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1948        let expected_next_day = NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();
1949
1950        test_row_builder_basic(
1951            &date_array,
1952            vec![
1953                Some(Variant::from(expected_epoch)),
1954                None,
1955                Some(Variant::from(expected_2024)),
1956                Some(Variant::from(expected_next_day)),
1957            ],
1958        );
1959    }
1960
1961    #[test]
1962    fn test_time32_second_row_builder() {
1963        use arrow::array::Time32SecondArray;
1964        use chrono::NaiveTime;
1965
1966        // Test Time32SecondArray with various times (seconds since midnight)
1967        let time_data = vec![
1968            Some(0), // 00:00:00
1969            None,
1970            Some(3661),  // 01:01:01
1971            Some(86399), // 23:59:59
1972        ];
1973        let time_array = Time32SecondArray::from(time_data);
1974
1975        let expected_midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
1976        let expected_time = NaiveTime::from_hms_opt(1, 1, 1).unwrap();
1977        let expected_last = NaiveTime::from_hms_opt(23, 59, 59).unwrap();
1978
1979        test_row_builder_basic(
1980            &time_array,
1981            vec![
1982                Some(Variant::from(expected_midnight)),
1983                None,
1984                Some(Variant::from(expected_time)),
1985                Some(Variant::from(expected_last)),
1986            ],
1987        );
1988    }
1989
1990    #[test]
1991    fn test_time32_millisecond_row_builder() {
1992        use arrow::array::Time32MillisecondArray;
1993        use chrono::NaiveTime;
1994
1995        // Test Time32MillisecondArray with various times (milliseconds since midnight)
1996        let time_data = vec![
1997            Some(0), // 00:00:00.000
1998            None,
1999            Some(3661123),  // 01:01:01.123
2000            Some(86399999), // 23:59:59.999
2001        ];
2002        let time_array = Time32MillisecondArray::from(time_data);
2003
2004        let expected_midnight = NaiveTime::from_hms_milli_opt(0, 0, 0, 0).unwrap();
2005        let expected_time = NaiveTime::from_hms_milli_opt(1, 1, 1, 123).unwrap();
2006        let expected_last = NaiveTime::from_hms_milli_opt(23, 59, 59, 999).unwrap();
2007
2008        test_row_builder_basic(
2009            &time_array,
2010            vec![
2011                Some(Variant::from(expected_midnight)),
2012                None,
2013                Some(Variant::from(expected_time)),
2014                Some(Variant::from(expected_last)),
2015            ],
2016        );
2017    }
2018
2019    #[test]
2020    fn test_time64_microsecond_row_builder() {
2021        use arrow::array::Time64MicrosecondArray;
2022        use chrono::NaiveTime;
2023
2024        // Test Time64MicrosecondArray with various times (microseconds since midnight)
2025        let time_data = vec![
2026            Some(0), // 00:00:00.000000
2027            None,
2028            Some(3661123456),  // 01:01:01.123456
2029            Some(86399999999), // 23:59:59.999999
2030        ];
2031        let time_array = Time64MicrosecondArray::from(time_data);
2032
2033        let expected_midnight = NaiveTime::from_hms_micro_opt(0, 0, 0, 0).unwrap();
2034        let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2035        let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2036
2037        test_row_builder_basic(
2038            &time_array,
2039            vec![
2040                Some(Variant::from(expected_midnight)),
2041                None,
2042                Some(Variant::from(expected_time)),
2043                Some(Variant::from(expected_last)),
2044            ],
2045        );
2046    }
2047
2048    #[test]
2049    fn test_time64_nanosecond_row_builder() {
2050        use arrow::array::Time64NanosecondArray;
2051        use chrono::NaiveTime;
2052
2053        // Test Time64NanosecondArray with various times (nanoseconds since midnight)
2054        let time_data = vec![
2055            Some(0), // 00:00:00.000000000
2056            None,
2057            Some(3661123456789),  // 01:01:01.123456789
2058            Some(86399999999999), // 23:59:59.999999999
2059        ];
2060        let time_array = Time64NanosecondArray::from(time_data);
2061
2062        let expected_midnight = NaiveTime::from_hms_nano_opt(0, 0, 0, 0).unwrap();
2063        // Nanoseconds are truncated to microsecond precision in Variant
2064        let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2065        let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2066
2067        test_row_builder_basic(
2068            &time_array,
2069            vec![
2070                Some(Variant::from(expected_midnight)),
2071                None,
2072                Some(Variant::from(expected_time)),
2073                Some(Variant::from(expected_last)),
2074            ],
2075        );
2076    }
2077}