Skip to main content

parquet_variant_compute/
arrow_to_variant.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::{
19    Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray,
20    GenericStringArray, ListLikeArray, OffsetSizeTrait, PrimitiveArray,
21};
22use arrow::compute::{CastOptions, kernels::cast};
23use arrow::datatypes::{
24    self as datatypes, ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType,
25    DecimalType, RunEndIndexType,
26};
27use arrow::temporal_conversions::{as_date, as_datetime, as_time};
28use arrow_schema::{ArrowError, DataType, TimeUnit};
29use chrono::{DateTime, TimeZone, Utc};
30use parquet_variant::{
31    ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
32    VariantDecimal16, VariantDecimalType,
33};
34use std::collections::HashMap;
35
36// ============================================================================
37// Row-oriented builders for efficient Arrow-to-Variant conversion
38// ============================================================================
39
40/// Row builder for converting Arrow arrays to VariantArray row by row
41pub(crate) enum ArrowToVariantRowBuilder<'a> {
42    Null(NullArrowToVariantBuilder),
43    Boolean(BooleanArrowToVariantBuilder<'a>),
44    PrimitiveInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::Int8Type>),
45    PrimitiveInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::Int16Type>),
46    PrimitiveInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::Int32Type>),
47    PrimitiveInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::Int64Type>),
48    PrimitiveUInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt8Type>),
49    PrimitiveUInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt16Type>),
50    PrimitiveUInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt32Type>),
51    PrimitiveUInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt64Type>),
52    PrimitiveFloat16(PrimitiveArrowToVariantBuilder<'a, datatypes::Float16Type>),
53    PrimitiveFloat32(PrimitiveArrowToVariantBuilder<'a, datatypes::Float32Type>),
54    PrimitiveFloat64(PrimitiveArrowToVariantBuilder<'a, datatypes::Float64Type>),
55    Decimal32(DecimalArrowToVariantBuilder<'a, datatypes::Decimal32Type, VariantDecimal4>),
56    Decimal64(DecimalArrowToVariantBuilder<'a, datatypes::Decimal64Type, VariantDecimal8>),
57    Decimal128(DecimalArrowToVariantBuilder<'a, datatypes::Decimal128Type, VariantDecimal16>),
58    Decimal256(Decimal256ArrowToVariantBuilder<'a>),
59    TimestampSecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampSecondType>),
60    TimestampMillisecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMillisecondType>),
61    TimestampMicrosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMicrosecondType>),
62    TimestampNanosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampNanosecondType>),
63    Date32(DateArrowToVariantBuilder<'a, datatypes::Date32Type>),
64    Date64(DateArrowToVariantBuilder<'a, datatypes::Date64Type>),
65    Time32Second(TimeArrowToVariantBuilder<'a, datatypes::Time32SecondType>),
66    Time32Millisecond(TimeArrowToVariantBuilder<'a, datatypes::Time32MillisecondType>),
67    Time64Microsecond(TimeArrowToVariantBuilder<'a, datatypes::Time64MicrosecondType>),
68    Time64Nanosecond(TimeArrowToVariantBuilder<'a, datatypes::Time64NanosecondType>),
69    Binary(BinaryArrowToVariantBuilder<'a, i32>),
70    LargeBinary(BinaryArrowToVariantBuilder<'a, i64>),
71    BinaryView(BinaryViewArrowToVariantBuilder<'a>),
72    FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder<'a>),
73    Utf8(StringArrowToVariantBuilder<'a, i32>),
74    LargeUtf8(StringArrowToVariantBuilder<'a, i64>),
75    Utf8View(StringViewArrowToVariantBuilder<'a>),
76    List(ListArrowToVariantBuilder<'a, GenericListArray<i32>>),
77    LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
78    ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
79    LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
80    FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
81    Struct(StructArrowToVariantBuilder<'a>),
82    Map(MapArrowToVariantBuilder<'a>),
83    Union(UnionArrowToVariantBuilder<'a>),
84    Dictionary(DictionaryArrowToVariantBuilder<'a>),
85    RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int16Type>),
86    RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int32Type>),
87    RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int64Type>),
88}
89
90impl<'a> ArrowToVariantRowBuilder<'a> {
91    /// Appends a single row at the given index to the supplied builder.
92    pub fn append_row(
93        &mut self,
94        builder: &mut impl VariantBuilderExt,
95        index: usize,
96    ) -> Result<(), ArrowError> {
97        use ArrowToVariantRowBuilder::*;
98        match self {
99            Null(b) => b.append_row(builder, index),
100            Boolean(b) => b.append_row(builder, index),
101            PrimitiveInt8(b) => b.append_row(builder, index),
102            PrimitiveInt16(b) => b.append_row(builder, index),
103            PrimitiveInt32(b) => b.append_row(builder, index),
104            PrimitiveInt64(b) => b.append_row(builder, index),
105            PrimitiveUInt8(b) => b.append_row(builder, index),
106            PrimitiveUInt16(b) => b.append_row(builder, index),
107            PrimitiveUInt32(b) => b.append_row(builder, index),
108            PrimitiveUInt64(b) => b.append_row(builder, index),
109            PrimitiveFloat16(b) => b.append_row(builder, index),
110            PrimitiveFloat32(b) => b.append_row(builder, index),
111            PrimitiveFloat64(b) => b.append_row(builder, index),
112            Decimal32(b) => b.append_row(builder, index),
113            Decimal64(b) => b.append_row(builder, index),
114            Decimal128(b) => b.append_row(builder, index),
115            Decimal256(b) => b.append_row(builder, index),
116            TimestampSecond(b) => b.append_row(builder, index),
117            TimestampMillisecond(b) => b.append_row(builder, index),
118            TimestampMicrosecond(b) => b.append_row(builder, index),
119            TimestampNanosecond(b) => b.append_row(builder, index),
120            Date32(b) => b.append_row(builder, index),
121            Date64(b) => b.append_row(builder, index),
122            Time32Second(b) => b.append_row(builder, index),
123            Time32Millisecond(b) => b.append_row(builder, index),
124            Time64Microsecond(b) => b.append_row(builder, index),
125            Time64Nanosecond(b) => b.append_row(builder, index),
126            Binary(b) => b.append_row(builder, index),
127            LargeBinary(b) => b.append_row(builder, index),
128            BinaryView(b) => b.append_row(builder, index),
129            FixedSizeBinary(b) => b.append_row(builder, index),
130            Utf8(b) => b.append_row(builder, index),
131            LargeUtf8(b) => b.append_row(builder, index),
132            Utf8View(b) => b.append_row(builder, index),
133            List(b) => b.append_row(builder, index),
134            LargeList(b) => b.append_row(builder, index),
135            ListView(b) => b.append_row(builder, index),
136            LargeListView(b) => b.append_row(builder, index),
137            FixedSizeList(b) => b.append_row(builder, index),
138            Struct(b) => b.append_row(builder, index),
139            Map(b) => b.append_row(builder, index),
140            Union(b) => b.append_row(builder, index),
141            Dictionary(b) => b.append_row(builder, index),
142            RunEndEncodedInt16(b) => b.append_row(builder, index),
143            RunEndEncodedInt32(b) => b.append_row(builder, index),
144            RunEndEncodedInt64(b) => b.append_row(builder, index),
145        }
146    }
147}
148
149/// Factory function to create the appropriate row builder for a given DataType
150pub(crate) fn make_arrow_to_variant_row_builder<'a>(
151    data_type: &'a DataType,
152    array: &'a dyn Array,
153    options: &'a CastOptions,
154) -> Result<ArrowToVariantRowBuilder<'a>, ArrowError> {
155    use ArrowToVariantRowBuilder::*;
156    let builder =
157        match data_type {
158            DataType::Null => Null(NullArrowToVariantBuilder),
159            DataType::Boolean => Boolean(BooleanArrowToVariantBuilder::new(array)),
160            DataType::Int8 => PrimitiveInt8(PrimitiveArrowToVariantBuilder::new(array)),
161            DataType::Int16 => PrimitiveInt16(PrimitiveArrowToVariantBuilder::new(array)),
162            DataType::Int32 => PrimitiveInt32(PrimitiveArrowToVariantBuilder::new(array)),
163            DataType::Int64 => PrimitiveInt64(PrimitiveArrowToVariantBuilder::new(array)),
164            DataType::UInt8 => PrimitiveUInt8(PrimitiveArrowToVariantBuilder::new(array)),
165            DataType::UInt16 => PrimitiveUInt16(PrimitiveArrowToVariantBuilder::new(array)),
166            DataType::UInt32 => PrimitiveUInt32(PrimitiveArrowToVariantBuilder::new(array)),
167            DataType::UInt64 => PrimitiveUInt64(PrimitiveArrowToVariantBuilder::new(array)),
168            DataType::Float16 => PrimitiveFloat16(PrimitiveArrowToVariantBuilder::new(array)),
169            DataType::Float32 => PrimitiveFloat32(PrimitiveArrowToVariantBuilder::new(array)),
170            DataType::Float64 => PrimitiveFloat64(PrimitiveArrowToVariantBuilder::new(array)),
171            DataType::Decimal32(_, scale) => {
172                Decimal32(DecimalArrowToVariantBuilder::new(array, options, *scale))
173            }
174            DataType::Decimal64(_, scale) => {
175                Decimal64(DecimalArrowToVariantBuilder::new(array, options, *scale))
176            }
177            DataType::Decimal128(_, scale) => {
178                Decimal128(DecimalArrowToVariantBuilder::new(array, options, *scale))
179            }
180            DataType::Decimal256(_, scale) => {
181                Decimal256(Decimal256ArrowToVariantBuilder::new(array, options, *scale))
182            }
183            DataType::Timestamp(time_unit, time_zone) => {
184                match time_unit {
185                    TimeUnit::Second => TimestampSecond(TimestampArrowToVariantBuilder::new(
186                        array,
187                        options,
188                        time_zone.is_some(),
189                    )),
190                    TimeUnit::Millisecond => TimestampMillisecond(
191                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
192                    ),
193                    TimeUnit::Microsecond => TimestampMicrosecond(
194                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
195                    ),
196                    TimeUnit::Nanosecond => TimestampNanosecond(
197                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
198                    ),
199                }
200            }
201            DataType::Date32 => Date32(DateArrowToVariantBuilder::new(array, options)),
202            DataType::Date64 => Date64(DateArrowToVariantBuilder::new(array, options)),
203            DataType::Time32(time_unit) => match time_unit {
204                TimeUnit::Second => Time32Second(TimeArrowToVariantBuilder::new(array, options)),
205                TimeUnit::Millisecond => {
206                    Time32Millisecond(TimeArrowToVariantBuilder::new(array, options))
207                }
208                _ => {
209                    return Err(ArrowError::CastError(format!(
210                        "Unsupported Time32 unit: {time_unit:?}"
211                    )));
212                }
213            },
214            DataType::Time64(time_unit) => match time_unit {
215                TimeUnit::Microsecond => {
216                    Time64Microsecond(TimeArrowToVariantBuilder::new(array, options))
217                }
218                TimeUnit::Nanosecond => {
219                    Time64Nanosecond(TimeArrowToVariantBuilder::new(array, options))
220                }
221                _ => {
222                    return Err(ArrowError::CastError(format!(
223                        "Unsupported Time64 unit: {time_unit:?}"
224                    )));
225                }
226            },
227            DataType::Duration(_) | DataType::Interval(_) => {
228                return Err(ArrowError::InvalidArgumentError(
229                    "Casting duration/interval types to Variant is not supported. \
230                    The Variant format does not define duration/interval types."
231                        .to_string(),
232                ));
233            }
234            DataType::Binary => Binary(BinaryArrowToVariantBuilder::new(array)),
235            DataType::LargeBinary => LargeBinary(BinaryArrowToVariantBuilder::new(array)),
236            DataType::BinaryView => BinaryView(BinaryViewArrowToVariantBuilder::new(array)),
237            DataType::FixedSizeBinary(_) => {
238                FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder::new(array))
239            }
240            DataType::Utf8 => Utf8(StringArrowToVariantBuilder::new(array)),
241            DataType::LargeUtf8 => LargeUtf8(StringArrowToVariantBuilder::new(array)),
242            DataType::Utf8View => Utf8View(StringViewArrowToVariantBuilder::new(array)),
243            DataType::List(_) => List(ListArrowToVariantBuilder::new(array.as_list(), options)?),
244            DataType::LargeList(_) => {
245                LargeList(ListArrowToVariantBuilder::new(array.as_list(), options)?)
246            }
247            DataType::ListView(_) => ListView(ListArrowToVariantBuilder::new(
248                array.as_list_view(),
249                options,
250            )?),
251            DataType::LargeListView(_) => LargeListView(ListArrowToVariantBuilder::new(
252                array.as_list_view(),
253                options,
254            )?),
255            DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
256                array.as_fixed_size_list(),
257                options,
258            )?),
259            DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
260                array.as_struct(),
261                options,
262            )?),
263            DataType::Map(_, _) => Map(MapArrowToVariantBuilder::new(array, options)?),
264            DataType::Union(_, _) => Union(UnionArrowToVariantBuilder::new(array, options)?),
265            DataType::Dictionary(_, _) => {
266                Dictionary(DictionaryArrowToVariantBuilder::new(array, options)?)
267            }
268            DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
269                DataType::Int16 => {
270                    RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
271                }
272                DataType::Int32 => {
273                    RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
274                }
275                DataType::Int64 => {
276                    RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
277                }
278                _ => {
279                    return Err(ArrowError::CastError(format!(
280                        "Unsupported run ends type: {}",
281                        run_ends.data_type()
282                    )));
283                }
284            },
285        };
286    Ok(builder)
287}
288
289/// Macro to define (possibly generic) row builders with consistent structure and behavior.
290///
291/// The macro optionally allows to define a transform for values read from the underlying
292/// array. Transforms of the form `|value| { ... }` are infallible (and should produce something
293/// that implements `Into<Variant>`), while transforms of the form `|value| -> Option<_> { ... }`
294/// are fallible (and should produce `Option<impl Into<Variant>>`); a failed tarnsform will either
295/// append null to the builder or return an error, depending on cast options.
296///
297/// Also supports optional extra fields that are passed to the constructor and which are available
298/// by reference in the value transform. Providing a fallible value transform requires also
299/// providing the extra field `options: &'a CastOptions`.
300// TODO: If/when the macro_metavar_expr feature stabilizes, the `ignore` meta-function would allow
301// us to "use" captured tokens without emitting them:
302//
303// ```
304// $(
305//     ${ignore($value)}
306//     $(
307//         ${ignore($option_ty)}
308//         options: &$lifetime CastOptions,
309//     )?
310// )?
311// ```
312//
313// That, in turn, would allow us to inject the `options` field whenever the user specifies a
314// fallible value transform, instead of requiring them to manually define it. This might not be
315// worth the trouble, tho, because it makes for some pretty bulky and unwieldy macro expansions.
316macro_rules! define_row_builder {
317    (
318        struct $name:ident<$lifetime:lifetime $(, $generic:ident $( : $bound:path )? )*>
319        $( where $where_path:path: $where_bound:path $(,)? )?
320        $({ $( $field:ident: $field_type:ty ),+ $(,)? })?,
321        |$array_param:ident| -> $array_type:ty { $init_expr:expr }
322        $(, |$value:ident| $(-> Option<$option_ty:ty>)? $value_transform:expr )?
323    ) => {
324        pub(crate) struct $name<$lifetime $(, $generic: $( $bound )? )*>
325        $( where $where_path: $where_bound )?
326        {
327            array: &$lifetime $array_type,
328            $( $( $field: $field_type, )+ )?
329            _phantom: std::marker::PhantomData<($( $generic, )*)>, // capture all type params
330        }
331
332        impl<$lifetime $(, $generic: $( $bound )? )*> $name<$lifetime $(, $generic)*>
333        $( where $where_path: $where_bound )?
334        {
335            pub(crate) fn new($array_param: &$lifetime dyn Array $( $(, $field: $field_type )+ )?) -> Self {
336                Self {
337                    array: $init_expr,
338                    $( $( $field, )+ )?
339                    _phantom: std::marker::PhantomData,
340                }
341            }
342
343            fn append_row(&self, builder: &mut impl VariantBuilderExt, index: usize) -> Result<(), ArrowError> {
344                if self.array.is_null(index) {
345                    builder.append_null();
346                } else {
347                    // Macro hygiene: Give any extra fields names the value transform can access.
348                    //
349                    // The value transform doesn't normally reference cast options, but the macro's
350                    // caller still has to declare the field because stable rust has no way to "use"
351                    // a captured token without emitting it. So, silence unused variable warnings,
352                    // assuming that's the `options` field. Unfortunately, that also silences
353                    // legitimate compiler warnings if an infallible value transform fails to use
354                    // its first extra field.
355                    $(
356                        #[allow(unused)]
357                        $( let $field = &self.$field; )+
358                    )?
359
360                    // Apply the value transform, if any (with name swapping for hygiene)
361                    let value = self.array.value(index);
362                    $(
363                        let $value = value;
364                        let value = $value_transform;
365                        $(
366                            // NOTE: The `?` macro expansion fails without the type annotation.
367                            let Some(value): Option<$option_ty> = value else {
368                                if !self.options.safe {
369                                    return Err(ArrowError::ComputeError(format!(
370                                        "Failed to convert value at index {index}: conversion failed",
371                                    )));
372                                } else {
373                                    // Overflow is encoded as Variant::Null,
374                                    // distinct from None indicating a missing value
375                                    builder.append_value(Variant::Null);
376                                    return Ok(());
377                                }
378                            };
379                        )?
380                    )?
381                    builder.append_value(value);
382                }
383                Ok(())
384            }
385        }
386    };
387}
388
389define_row_builder!(
390    struct BooleanArrowToVariantBuilder<'a>,
391    |array| -> arrow::array::BooleanArray { array.as_boolean() }
392);
393
394define_row_builder!(
395    struct PrimitiveArrowToVariantBuilder<'a, T: ArrowPrimitiveType>
396    where T::Native: Into<Variant<'a, 'a>>,
397    |array| -> PrimitiveArray<T> { array.as_primitive() }
398);
399
400define_row_builder!(
401    struct DecimalArrowToVariantBuilder<'a, A: DecimalType, V>
402    where
403        V: VariantDecimalType<Native = A::Native>,
404    {
405        options: &'a CastOptions<'a>,
406        scale: i8,
407    },
408    |array| -> PrimitiveArray<A> { array.as_primitive() },
409    |value| -> Option<_> { V::try_new_with_signed_scale(value, *scale).ok() }
410);
411
412// Decimal256 needs a two-stage conversion via i128
413define_row_builder!(
414    struct Decimal256ArrowToVariantBuilder<'a> {
415        options: &'a CastOptions<'a>,
416        scale: i8,
417    },
418    |array| -> arrow::array::Decimal256Array { array.as_primitive() },
419    |value| -> Option<_> {
420        let value = value.to_i128();
421        value.and_then(|v| VariantDecimal16::try_new_with_signed_scale(v, *scale).ok())
422    }
423);
424
425define_row_builder!(
426    struct TimestampArrowToVariantBuilder<'a, T: ArrowTimestampType> {
427        options: &'a CastOptions<'a>,
428        has_time_zone: bool,
429    },
430    |array| -> PrimitiveArray<T> { array.as_primitive() },
431    |value| -> Option<_> {
432        // Convert using Arrow's temporal conversion functions
433        as_datetime::<T>(value).map(|naive_datetime| {
434            if *has_time_zone {
435                // Has timezone -> DateTime<Utc> -> TimestampMicros/TimestampNanos
436                let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&naive_datetime);
437                Variant::from(utc_dt) // Uses From<DateTime<Utc>> for Variant
438            } else {
439                // No timezone -> NaiveDateTime -> TimestampNtzMicros/TimestampNtzNanos
440                Variant::from(naive_datetime) // Uses From<NaiveDateTime> for Variant
441            }
442        })
443    }
444);
445
446define_row_builder!(
447    struct DateArrowToVariantBuilder<'a, T: ArrowTemporalType>
448    where
449        i64: From<T::Native>,
450    {
451        options: &'a CastOptions<'a>,
452    },
453    |array| -> PrimitiveArray<T> { array.as_primitive() },
454    |value| -> Option<_> {
455        let date_value = i64::from(value);
456        as_date::<T>(date_value)
457    }
458);
459
460define_row_builder!(
461    struct TimeArrowToVariantBuilder<'a, T: ArrowTemporalType>
462    where
463        i64: From<T::Native>,
464    {
465        options: &'a CastOptions<'a>,
466    },
467    |array| -> PrimitiveArray<T> { array.as_primitive() },
468    |value| -> Option<_> {
469        let time_value = i64::from(value);
470        as_time::<T>(time_value)
471    }
472);
473
474define_row_builder!(
475    struct BinaryArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
476    |array| -> GenericBinaryArray<O> { array.as_binary() }
477);
478
479define_row_builder!(
480    struct BinaryViewArrowToVariantBuilder<'a>,
481    |array| -> arrow::array::BinaryViewArray { array.as_byte_view() }
482);
483
484define_row_builder!(
485    struct FixedSizeBinaryArrowToVariantBuilder<'a>,
486    |array| -> arrow::array::FixedSizeBinaryArray { array.as_fixed_size_binary() }
487);
488
489define_row_builder!(
490    struct StringArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
491    |array| -> GenericStringArray<O> { array.as_string() }
492);
493
494define_row_builder!(
495    struct StringViewArrowToVariantBuilder<'a>,
496    |array| -> arrow::array::StringViewArray { array.as_string_view() }
497);
498
499/// Null builder that always appends null
500pub(crate) struct NullArrowToVariantBuilder;
501
502impl NullArrowToVariantBuilder {
503    fn append_row(
504        &mut self,
505        builder: &mut impl VariantBuilderExt,
506        _index: usize,
507    ) -> Result<(), ArrowError> {
508        builder.append_null();
509        Ok(())
510    }
511}
512
513/// Generic list builder for ListLikeArray types including List, LargeList, ListView, LargeListView,
514/// and FixedSizeList
515pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
516    list_array: &'a L,
517    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
518}
519
520impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
521    pub(crate) fn new(array: &'a L, options: &'a CastOptions) -> Result<Self, ArrowError> {
522        let values = array.values();
523        let values_builder =
524            make_arrow_to_variant_row_builder(values.data_type(), values, options)?;
525
526        Ok(Self {
527            list_array: array,
528            values_builder: Box::new(values_builder),
529        })
530    }
531
532    fn append_row(
533        &mut self,
534        builder: &mut impl VariantBuilderExt,
535        index: usize,
536    ) -> Result<(), ArrowError> {
537        if self.list_array.is_null(index) {
538            builder.append_null();
539            return Ok(());
540        }
541
542        let range = self.list_array.element_range(index);
543
544        let mut list_builder = builder.try_new_list()?;
545        for value_index in range {
546            self.values_builder
547                .append_row(&mut list_builder, value_index)?;
548        }
549        list_builder.finish();
550        Ok(())
551    }
552}
553
554/// Struct builder for StructArray
555pub(crate) struct StructArrowToVariantBuilder<'a> {
556    struct_array: &'a arrow::array::StructArray,
557    field_builders: Vec<(&'a str, ArrowToVariantRowBuilder<'a>)>,
558}
559
560impl<'a> StructArrowToVariantBuilder<'a> {
561    pub(crate) fn new(
562        struct_array: &'a arrow::array::StructArray,
563        options: &'a CastOptions,
564    ) -> Result<Self, ArrowError> {
565        let mut field_builders = Vec::new();
566
567        // Create a row builder for each field
568        for (field_name, field_array) in struct_array
569            .column_names()
570            .iter()
571            .zip(struct_array.columns().iter())
572        {
573            let field_builder = make_arrow_to_variant_row_builder(
574                field_array.data_type(),
575                field_array.as_ref(),
576                options,
577            )?;
578            field_builders.push((*field_name, field_builder));
579        }
580
581        Ok(Self {
582            struct_array,
583            field_builders,
584        })
585    }
586
587    fn append_row(
588        &mut self,
589        builder: &mut impl VariantBuilderExt,
590        index: usize,
591    ) -> Result<(), ArrowError> {
592        if self.struct_array.is_null(index) {
593            builder.append_null();
594        } else {
595            // Create object builder for this struct row
596            let mut obj_builder = builder.try_new_object()?;
597
598            // Process each field
599            for (field_name, row_builder) in &mut self.field_builders {
600                let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
601                row_builder.append_row(&mut field_builder, index)?;
602            }
603
604            obj_builder.finish();
605        }
606        Ok(())
607    }
608}
609
610/// Map builder for MapArray types
611pub(crate) struct MapArrowToVariantBuilder<'a> {
612    map_array: &'a arrow::array::MapArray,
613    key_strings: arrow::array::StringArray,
614    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
615}
616
617impl<'a> MapArrowToVariantBuilder<'a> {
618    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
619        let map_array = array.as_map();
620
621        // Pre-cast keys to strings once
622        let keys = cast(map_array.keys(), &DataType::Utf8)?;
623        let key_strings = keys.as_string::<i32>().clone();
624
625        // Create recursive builder for values
626        let values = map_array.values();
627        let values_builder =
628            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
629
630        Ok(Self {
631            map_array,
632            key_strings,
633            values_builder: Box::new(values_builder),
634        })
635    }
636
637    fn append_row(
638        &mut self,
639        builder: &mut impl VariantBuilderExt,
640        index: usize,
641    ) -> Result<(), ArrowError> {
642        // Check for NULL map first (via null bitmap)
643        if self.map_array.is_null(index) {
644            builder.append_null();
645            return Ok(());
646        }
647
648        let offsets = self.map_array.offsets();
649        let start = offsets[index].as_usize();
650        let end = offsets[index + 1].as_usize();
651
652        // Create object builder for this map
653        let mut object_builder = builder.try_new_object()?;
654
655        // Add each key-value pair (loop does nothing for empty maps - correct!)
656        for kv_index in start..end {
657            let key = self.key_strings.value(kv_index);
658            let mut field_builder = ObjectFieldBuilder::new(key, &mut object_builder);
659            self.values_builder
660                .append_row(&mut field_builder, kv_index)?;
661        }
662
663        object_builder.finish();
664        Ok(())
665    }
666}
667
668/// Union builder for both sparse and dense union arrays
669///
670/// NOTE: Union type ids are _not_ required to be dense, hence the hash map for child builders.
671pub(crate) struct UnionArrowToVariantBuilder<'a> {
672    union_array: &'a arrow::array::UnionArray,
673    child_builders: HashMap<i8, Box<ArrowToVariantRowBuilder<'a>>>,
674}
675
676impl<'a> UnionArrowToVariantBuilder<'a> {
677    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
678        let union_array = array.as_union();
679        let type_ids = union_array.type_ids();
680
681        // Create child builders for each union field
682        let mut child_builders = HashMap::new();
683        for &type_id in type_ids {
684            let child_array = union_array.child(type_id);
685            let child_builder = make_arrow_to_variant_row_builder(
686                child_array.data_type(),
687                child_array.as_ref(),
688                options,
689            )?;
690            child_builders.insert(type_id, Box::new(child_builder));
691        }
692
693        Ok(Self {
694            union_array,
695            child_builders,
696        })
697    }
698
699    fn append_row(
700        &mut self,
701        builder: &mut impl VariantBuilderExt,
702        index: usize,
703    ) -> Result<(), ArrowError> {
704        let type_id = self.union_array.type_id(index);
705        let value_offset = self.union_array.value_offset(index);
706
707        // Delegate to the appropriate child builder, or append null to handle an invalid type_id
708        match self.child_builders.get_mut(&type_id) {
709            Some(child_builder) => child_builder.append_row(builder, value_offset)?,
710            None => builder.append_null(),
711        }
712
713        Ok(())
714    }
715}
716
717/// Dictionary array builder with simple O(1) indexing
718pub(crate) struct DictionaryArrowToVariantBuilder<'a> {
719    keys: &'a dyn Array, // only needed for null checks
720    normalized_keys: Vec<usize>,
721    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
722}
723
724impl<'a> DictionaryArrowToVariantBuilder<'a> {
725    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
726        let dict_array = array.as_any_dictionary();
727        let values = dict_array.values();
728        let values_builder =
729            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
730
731        // WARNING: normalized_keys panics if values is empty
732        let normalized_keys = match values.len() {
733            0 => Vec::new(),
734            _ => dict_array.normalized_keys(),
735        };
736
737        Ok(Self {
738            keys: dict_array.keys(),
739            normalized_keys,
740            values_builder: Box::new(values_builder),
741        })
742    }
743
744    fn append_row(
745        &mut self,
746        builder: &mut impl VariantBuilderExt,
747        index: usize,
748    ) -> Result<(), ArrowError> {
749        if self.keys.is_null(index) {
750            builder.append_null();
751        } else {
752            let normalized_key = self.normalized_keys[index];
753            self.values_builder.append_row(builder, normalized_key)?;
754        }
755        Ok(())
756    }
757}
758
759/// Run-end encoded array builder with efficient sequential access
760pub(crate) struct RunEndEncodedArrowToVariantBuilder<'a, R: RunEndIndexType> {
761    run_array: &'a arrow::array::RunArray<R>,
762    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
763
764    run_ends: &'a [R::Native],
765    run_number: usize, // Physical index into run_ends and values
766    run_start: usize,  // Logical start index of current run
767}
768
769impl<'a, R: RunEndIndexType> RunEndEncodedArrowToVariantBuilder<'a, R> {
770    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
771        let Some(run_array) = array.as_run_opt() else {
772            return Err(ArrowError::CastError("Expected RunArray".to_string()));
773        };
774
775        let values = run_array.values();
776        let values_builder =
777            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
778
779        Ok(Self {
780            run_array,
781            values_builder: Box::new(values_builder),
782            run_ends: run_array.run_ends().values(),
783            run_number: 0,
784            run_start: 0,
785        })
786    }
787
788    fn set_run_for_index(&mut self, index: usize) -> Result<(), ArrowError> {
789        if index >= self.run_start {
790            let Some(run_end) = self.run_ends.get(self.run_number) else {
791                return Err(ArrowError::CastError(format!(
792                    "Index {index} beyond run array"
793                )));
794            };
795            if index < run_end.as_usize() {
796                return Ok(());
797            }
798            if index == run_end.as_usize() {
799                self.run_number += 1;
800                self.run_start = run_end.as_usize();
801                return Ok(());
802            }
803        }
804
805        // Use partition_point for all non-sequential cases
806        let run_number = self
807            .run_ends
808            .partition_point(|&run_end| run_end.as_usize() <= index);
809        if run_number >= self.run_ends.len() {
810            return Err(ArrowError::CastError(format!(
811                "Index {index} beyond run array"
812            )));
813        }
814        self.run_number = run_number;
815        self.run_start = match run_number {
816            0 => 0,
817            _ => self.run_ends[run_number - 1].as_usize(),
818        };
819        Ok(())
820    }
821
822    fn append_row(
823        &mut self,
824        builder: &mut impl VariantBuilderExt,
825        index: usize,
826    ) -> Result<(), ArrowError> {
827        self.set_run_for_index(index)?;
828
829        // Handle null values
830        if self.run_array.values().is_null(self.run_number) {
831            builder.append_null();
832            return Ok(());
833        }
834
835        // Re-encode the value
836        self.values_builder.append_row(builder, self.run_number)?;
837
838        Ok(())
839    }
840}
841
842#[cfg(test)]
843mod tests {
844    use super::*;
845    use crate::{VariantArray, VariantArrayBuilder};
846    use arrow::array::{ArrayRef, BooleanArray, Int32Array, StringArray};
847    use arrow::datatypes::Int32Type;
848    use std::sync::Arc;
849
850    /// Builds a VariantArray from an Arrow array using the row builder.
851    fn execute_row_builder_test(array: &dyn Array) -> VariantArray {
852        execute_row_builder_test_with_options(
853            array,
854            CastOptions {
855                safe: false,
856                ..Default::default()
857            },
858        )
859    }
860
861    /// Variant of `execute_row_builder_test` that allows specifying options
862    fn execute_row_builder_test_with_options(
863        array: &dyn Array,
864        options: CastOptions,
865    ) -> VariantArray {
866        let mut row_builder =
867            make_arrow_to_variant_row_builder(array.data_type(), array, &options).unwrap();
868
869        let mut array_builder = VariantArrayBuilder::new(array.len());
870
871        // The repetitive loop that appears in every test
872        for i in 0..array.len() {
873            row_builder.append_row(&mut array_builder, i).unwrap();
874        }
875
876        let variant_array = array_builder.build();
877        assert_eq!(variant_array.len(), array.len());
878        variant_array
879    }
880
881    /// Generic helper function to test row builders with basic assertion patterns.
882    /// Uses execute_row_builder_test and adds simple value comparison assertions.
883    fn test_row_builder_basic(array: &dyn Array, expected_values: Vec<Option<Variant>>) {
884        test_row_builder_basic_with_options(
885            array,
886            expected_values,
887            CastOptions {
888                safe: false,
889                ..Default::default()
890            },
891        );
892    }
893
894    /// Variant of `test_row_builder_basic` that allows specifying options
895    fn test_row_builder_basic_with_options(
896        array: &dyn Array,
897        expected_values: Vec<Option<Variant>>,
898        options: CastOptions,
899    ) {
900        let variant_array = execute_row_builder_test_with_options(array, options);
901
902        // The repetitive assertion pattern
903        for (i, expected) in expected_values.iter().enumerate() {
904            match expected {
905                Some(variant) => {
906                    assert_eq!(variant_array.value(i), *variant, "Mismatch at index {}", i)
907                }
908                None => assert!(variant_array.is_null(i), "Expected null at index {}", i),
909            }
910        }
911    }
912
913    #[test]
914    fn test_primitive_row_builder() {
915        let int_array = Int32Array::from(vec![Some(42), None, Some(100)]);
916        test_row_builder_basic(
917            &int_array,
918            vec![Some(Variant::Int32(42)), None, Some(Variant::Int32(100))],
919        );
920    }
921
922    #[test]
923    fn test_string_row_builder() {
924        let string_array = StringArray::from(vec![Some("hello"), None, Some("world")]);
925        test_row_builder_basic(
926            &string_array,
927            vec![
928                Some(Variant::from("hello")),
929                None,
930                Some(Variant::from("world")),
931            ],
932        );
933    }
934
935    #[test]
936    fn test_boolean_row_builder() {
937        let bool_array = BooleanArray::from(vec![Some(true), None, Some(false)]);
938        test_row_builder_basic(
939            &bool_array,
940            vec![Some(Variant::from(true)), None, Some(Variant::from(false))],
941        );
942    }
943
944    #[test]
945    fn test_struct_row_builder() {
946        use arrow::array::{ArrayRef, Int32Array, StringArray, StructArray};
947        use arrow_schema::{DataType, Field};
948        use std::sync::Arc;
949
950        // Create a struct array with int and string fields
951        let int_field = Field::new("id", DataType::Int32, true);
952        let string_field = Field::new("name", DataType::Utf8, true);
953
954        let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
955        let string_array = StringArray::from(vec![Some("Alice"), Some("Bob"), None]);
956
957        let struct_array = StructArray::try_new(
958            vec![int_field, string_field].into(),
959            vec![
960                Arc::new(int_array) as ArrayRef,
961                Arc::new(string_array) as ArrayRef,
962            ],
963            None,
964        )
965        .unwrap();
966
967        let variant_array = execute_row_builder_test(&struct_array);
968
969        // Check first row - should have both fields
970        let first_variant = variant_array.value(0);
971        assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
972        assert_eq!(
973            first_variant.get_object_field("name"),
974            Some(Variant::from("Alice"))
975        );
976
977        // Check second row - should have name field but not id (null field omitted)
978        let second_variant = variant_array.value(1);
979        assert_eq!(second_variant.get_object_field("id"), None); // null field omitted
980        assert_eq!(
981            second_variant.get_object_field("name"),
982            Some(Variant::from("Bob"))
983        );
984
985        // Check third row - should have id field but not name (null field omitted)
986        let third_variant = variant_array.value(2);
987        assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(3)));
988        assert_eq!(third_variant.get_object_field("name"), None); // null field omitted
989    }
990
991    #[test]
992    fn test_run_end_encoded_row_builder() {
993        use arrow::array::{Int32Array, RunArray};
994        use arrow::datatypes::Int32Type;
995
996        // Create a run-end encoded array: [A, A, B, B, B, C]
997        // run_ends: [2, 5, 6]
998        // values: ["A", "B", "C"]
999        let values = StringArray::from(vec!["A", "B", "C"]);
1000        let run_ends = Int32Array::from(vec![2, 5, 6]);
1001        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1002
1003        let variant_array = execute_row_builder_test(&run_array);
1004
1005        // Verify the values
1006        assert_eq!(variant_array.value(0), Variant::from("A")); // Run 0
1007        assert_eq!(variant_array.value(1), Variant::from("A")); // Run 0
1008        assert_eq!(variant_array.value(2), Variant::from("B")); // Run 1
1009        assert_eq!(variant_array.value(3), Variant::from("B")); // Run 1
1010        assert_eq!(variant_array.value(4), Variant::from("B")); // Run 1
1011        assert_eq!(variant_array.value(5), Variant::from("C")); // Run 2
1012    }
1013
1014    #[test]
1015    fn test_run_end_encoded_random_access() {
1016        use arrow::array::{Int32Array, RunArray};
1017        use arrow::datatypes::Int32Type;
1018
1019        // Create a run-end encoded array: [A, A, B, B, B, C]
1020        let values = StringArray::from(vec!["A", "B", "C"]);
1021        let run_ends = Int32Array::from(vec![2, 5, 6]);
1022        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1023
1024        let options = CastOptions {
1025            safe: false,
1026            ..Default::default()
1027        };
1028        let mut row_builder =
1029            make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1030
1031        // Test random access pattern (backward jumps, forward jumps)
1032        let access_pattern = [0, 5, 2, 4, 1, 3]; // Mix of all cases
1033        let expected_values = ["A", "C", "B", "B", "A", "B"];
1034
1035        for (i, &index) in access_pattern.iter().enumerate() {
1036            let mut array_builder = VariantArrayBuilder::new(1);
1037            row_builder.append_row(&mut array_builder, index).unwrap();
1038            let variant_array = array_builder.build();
1039            assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1040        }
1041    }
1042
1043    #[test]
1044    fn test_run_end_encoded_with_nulls() {
1045        use arrow::array::{Int32Array, RunArray};
1046        use arrow::datatypes::Int32Type;
1047
1048        // Create a run-end encoded array with null values: [A, A, null, null, B]
1049        let values = StringArray::from(vec![Some("A"), None, Some("B")]);
1050        let run_ends = Int32Array::from(vec![2, 4, 5]);
1051        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1052
1053        let options = CastOptions {
1054            safe: false,
1055            ..Default::default()
1056        };
1057        let mut row_builder =
1058            make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
1059        let mut array_builder = VariantArrayBuilder::new(5);
1060
1061        // Test sequential access
1062        for i in 0..5 {
1063            row_builder.append_row(&mut array_builder, i).unwrap();
1064        }
1065
1066        let variant_array = array_builder.build();
1067        assert_eq!(variant_array.len(), 5);
1068
1069        // Verify the values
1070        assert_eq!(variant_array.value(0), Variant::from("A")); // Run 0
1071        assert_eq!(variant_array.value(1), Variant::from("A")); // Run 0
1072        assert!(variant_array.is_null(2)); // Run 1 (null)
1073        assert!(variant_array.is_null(3)); // Run 1 (null)
1074        assert_eq!(variant_array.value(4), Variant::from("B")); // Run 2
1075    }
1076
1077    #[test]
1078    fn test_dictionary_row_builder() {
1079        use arrow::array::{DictionaryArray, Int32Array};
1080        use arrow::datatypes::Int32Type;
1081
1082        // Create a dictionary array: keys=[0, 1, 0, 2, 1], values=["apple", "banana", "cherry"]
1083        let values = StringArray::from(vec!["apple", "banana", "cherry"]);
1084        let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1085        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1086
1087        let variant_array = execute_row_builder_test(&dict_array);
1088
1089        // Verify the values match the dictionary lookup
1090        assert_eq!(variant_array.value(0), Variant::from("apple")); // keys[0] = 0 -> values[0] = "apple"
1091        assert_eq!(variant_array.value(1), Variant::from("banana")); // keys[1] = 1 -> values[1] = "banana"
1092        assert_eq!(variant_array.value(2), Variant::from("apple")); // keys[2] = 0 -> values[0] = "apple"
1093        assert_eq!(variant_array.value(3), Variant::from("cherry")); // keys[3] = 2 -> values[2] = "cherry"
1094        assert_eq!(variant_array.value(4), Variant::from("banana")); // keys[4] = 1 -> values[1] = "banana"
1095    }
1096
1097    #[test]
1098    fn test_dictionary_with_nulls() {
1099        use arrow::array::{DictionaryArray, Int32Array};
1100        use arrow::datatypes::Int32Type;
1101
1102        // Create a dictionary array with null keys: keys=[0, null, 1, null, 2], values=["x", "y", "z"]
1103        let values = StringArray::from(vec!["x", "y", "z"]);
1104        let keys = Int32Array::from(vec![Some(0), None, Some(1), None, Some(2)]);
1105        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1106
1107        let options = CastOptions {
1108            safe: false,
1109            ..Default::default()
1110        };
1111        let mut row_builder =
1112            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1113                .unwrap();
1114        let mut array_builder = VariantArrayBuilder::new(5);
1115
1116        // Test sequential access
1117        for i in 0..5 {
1118            row_builder.append_row(&mut array_builder, i).unwrap();
1119        }
1120
1121        let variant_array = array_builder.build();
1122        assert_eq!(variant_array.len(), 5);
1123
1124        // Verify the values and nulls
1125        assert_eq!(variant_array.value(0), Variant::from("x")); // keys[0] = 0 -> values[0] = "x"
1126        assert!(variant_array.is_null(1)); // keys[1] = null
1127        assert_eq!(variant_array.value(2), Variant::from("y")); // keys[2] = 1 -> values[1] = "y"
1128        assert!(variant_array.is_null(3)); // keys[3] = null
1129        assert_eq!(variant_array.value(4), Variant::from("z")); // keys[4] = 2 -> values[2] = "z"
1130    }
1131
1132    #[test]
1133    fn test_dictionary_random_access() {
1134        use arrow::array::{DictionaryArray, Int32Array};
1135        use arrow::datatypes::Int32Type;
1136
1137        // Create a dictionary array: keys=[0, 1, 2, 0, 1, 2], values=["red", "green", "blue"]
1138        let values = StringArray::from(vec!["red", "green", "blue"]);
1139        let keys = Int32Array::from(vec![0, 1, 2, 0, 1, 2]);
1140        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
1141
1142        let options = CastOptions {
1143            safe: false,
1144            ..Default::default()
1145        };
1146        let mut row_builder =
1147            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1148                .unwrap();
1149
1150        // Test random access pattern
1151        let access_pattern = [5, 0, 3, 1, 4, 2]; // Random order
1152        let expected_values = ["blue", "red", "red", "green", "green", "blue"];
1153
1154        for (i, &index) in access_pattern.iter().enumerate() {
1155            let mut array_builder = VariantArrayBuilder::new(1);
1156            row_builder.append_row(&mut array_builder, index).unwrap();
1157            let variant_array = array_builder.build();
1158            assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
1159        }
1160    }
1161
1162    #[test]
1163    fn test_nested_dictionary() {
1164        use arrow::array::{DictionaryArray, Int32Array, StructArray};
1165        use arrow::datatypes::{Field, Int32Type};
1166
1167        // Create a dictionary with struct values
1168        let id_array = Int32Array::from(vec![1, 2, 3]);
1169        let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
1170        let struct_array = StructArray::from(vec![
1171            (
1172                Arc::new(Field::new("id", DataType::Int32, false)),
1173                Arc::new(id_array) as ArrayRef,
1174            ),
1175            (
1176                Arc::new(Field::new("name", DataType::Utf8, false)),
1177                Arc::new(name_array) as ArrayRef,
1178            ),
1179        ]);
1180
1181        let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
1182        let dict_array =
1183            DictionaryArray::<Int32Type>::try_new(keys, Arc::new(struct_array)).unwrap();
1184
1185        let options = CastOptions {
1186            safe: false,
1187            ..Default::default()
1188        };
1189        let mut row_builder =
1190            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
1191                .unwrap();
1192        let mut array_builder = VariantArrayBuilder::new(5);
1193
1194        // Test sequential access
1195        for i in 0..5 {
1196            row_builder.append_row(&mut array_builder, i).unwrap();
1197        }
1198
1199        let variant_array = array_builder.build();
1200        assert_eq!(variant_array.len(), 5);
1201
1202        // Verify the nested struct values
1203        let first_variant = variant_array.value(0);
1204        assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
1205        assert_eq!(
1206            first_variant.get_object_field("name"),
1207            Some(Variant::from("Alice"))
1208        );
1209
1210        let second_variant = variant_array.value(1);
1211        assert_eq!(
1212            second_variant.get_object_field("id"),
1213            Some(Variant::from(2))
1214        );
1215        assert_eq!(
1216            second_variant.get_object_field("name"),
1217            Some(Variant::from("Bob"))
1218        );
1219
1220        // Test that repeated keys give same values
1221        let third_variant = variant_array.value(2);
1222        assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(1)));
1223        assert_eq!(
1224            third_variant.get_object_field("name"),
1225            Some(Variant::from("Alice"))
1226        );
1227    }
1228
1229    #[test]
1230    fn test_list_row_builder() {
1231        use arrow::array::ListArray;
1232
1233        // Create a list array: [[1, 2], [3, 4, 5], null, []]
1234        let data = vec![
1235            Some(vec![Some(1), Some(2)]),
1236            Some(vec![Some(3), Some(4), Some(5)]),
1237            None,
1238            Some(vec![]),
1239        ];
1240        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1241
1242        let variant_array = execute_row_builder_test(&list_array);
1243
1244        // Row 0: [1, 2]
1245        let row0 = variant_array.value(0);
1246        let list0 = row0.as_list().unwrap();
1247        assert_eq!(list0.len(), 2);
1248        assert_eq!(list0.get(0), Some(Variant::from(1)));
1249        assert_eq!(list0.get(1), Some(Variant::from(2)));
1250
1251        // Row 1: [3, 4, 5]
1252        let row1 = variant_array.value(1);
1253        let list1 = row1.as_list().unwrap();
1254        assert_eq!(list1.len(), 3);
1255        assert_eq!(list1.get(0), Some(Variant::from(3)));
1256        assert_eq!(list1.get(1), Some(Variant::from(4)));
1257        assert_eq!(list1.get(2), Some(Variant::from(5)));
1258
1259        // Row 2: null
1260        assert!(variant_array.is_null(2));
1261
1262        // Row 3: []
1263        let row3 = variant_array.value(3);
1264        let list3 = row3.as_list().unwrap();
1265        assert_eq!(list3.len(), 0);
1266    }
1267
1268    #[test]
1269    fn test_sliced_list_row_builder() {
1270        use arrow::array::ListArray;
1271
1272        // Create a list array: [[1, 2], [3, 4, 5], [6]]
1273        let data = vec![
1274            Some(vec![Some(1), Some(2)]),
1275            Some(vec![Some(3), Some(4), Some(5)]),
1276            Some(vec![Some(6)]),
1277        ];
1278        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1279
1280        // Slice to get just the middle element: [[3, 4, 5]]
1281        let sliced_array = list_array.slice(1, 1);
1282
1283        let options = CastOptions {
1284            safe: false,
1285            ..Default::default()
1286        };
1287        let mut row_builder =
1288            make_arrow_to_variant_row_builder(sliced_array.data_type(), &sliced_array, &options)
1289                .unwrap();
1290        let mut variant_array_builder = VariantArrayBuilder::new(sliced_array.len());
1291
1292        // Test the single row
1293        row_builder
1294            .append_row(&mut variant_array_builder, 0)
1295            .unwrap();
1296        let variant_array = variant_array_builder.build();
1297
1298        // Verify result
1299        assert_eq!(variant_array.len(), 1);
1300
1301        // Row 0: [3, 4, 5]
1302        let row0 = variant_array.value(0);
1303        let list0 = row0.as_list().unwrap();
1304        assert_eq!(list0.len(), 3);
1305        assert_eq!(list0.get(0), Some(Variant::from(3)));
1306        assert_eq!(list0.get(1), Some(Variant::from(4)));
1307        assert_eq!(list0.get(2), Some(Variant::from(5)));
1308    }
1309
1310    #[test]
1311    fn test_nested_list_row_builder() {
1312        use arrow::array::ListArray;
1313        use arrow::datatypes::Field;
1314
1315        // Build the nested structure manually
1316        let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
1317        let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_field), true));
1318
1319        let values_data = vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])];
1320        let values_list = ListArray::from_iter_primitive::<Int32Type, _, _>(values_data);
1321
1322        let outer_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 2].into());
1323        let outer_list = ListArray::new(
1324            inner_list_field,
1325            outer_offsets,
1326            Arc::new(values_list),
1327            Some(arrow::buffer::NullBuffer::from(vec![true, false])),
1328        );
1329
1330        let options = CastOptions {
1331            safe: false,
1332            ..Default::default()
1333        };
1334        let mut row_builder =
1335            make_arrow_to_variant_row_builder(outer_list.data_type(), &outer_list, &options)
1336                .unwrap();
1337        let mut variant_array_builder = VariantArrayBuilder::new(outer_list.len());
1338
1339        for i in 0..outer_list.len() {
1340            row_builder
1341                .append_row(&mut variant_array_builder, i)
1342                .unwrap();
1343        }
1344
1345        let variant_array = variant_array_builder.build();
1346
1347        // Verify results
1348        assert_eq!(variant_array.len(), 2);
1349
1350        // Row 0: [[1, 2], [3]]
1351        let row0 = variant_array.value(0);
1352        let outer_list0 = row0.as_list().unwrap();
1353        assert_eq!(outer_list0.len(), 2);
1354
1355        let inner_list0_0 = outer_list0.get(0).unwrap();
1356        let inner_list0_0 = inner_list0_0.as_list().unwrap();
1357        assert_eq!(inner_list0_0.len(), 2);
1358        assert_eq!(inner_list0_0.get(0), Some(Variant::from(1)));
1359        assert_eq!(inner_list0_0.get(1), Some(Variant::from(2)));
1360
1361        let inner_list0_1 = outer_list0.get(1).unwrap();
1362        let inner_list0_1 = inner_list0_1.as_list().unwrap();
1363        assert_eq!(inner_list0_1.len(), 1);
1364        assert_eq!(inner_list0_1.get(0), Some(Variant::from(3)));
1365
1366        // Row 1: null
1367        assert!(variant_array.is_null(1));
1368    }
1369
1370    #[test]
1371    fn test_map_row_builder() {
1372        use arrow::array::{Int32Array, MapArray, StringArray, StructArray};
1373        use arrow::buffer::{NullBuffer, OffsetBuffer};
1374        use arrow::datatypes::{DataType, Field, Fields};
1375        use std::sync::Arc;
1376
1377        // Create the entries struct array (key-value pairs)
1378        let keys = StringArray::from(vec!["key1", "key2", "key3"]);
1379        let values = Int32Array::from(vec![1, 2, 3]);
1380        let entries_fields = Fields::from(vec![
1381            Field::new("key", DataType::Utf8, false),
1382            Field::new("value", DataType::Int32, true),
1383        ]);
1384        let entries = StructArray::new(
1385            entries_fields.clone(),
1386            vec![Arc::new(keys), Arc::new(values)],
1387            None, // No nulls in the entries themselves
1388        );
1389
1390        // Create offsets for 4 maps: [0..1], [1..1], [1..1], [1..3]
1391        // Map 0: {"key1": 1}    (1 entry)
1392        // Map 1: {}             (0 entries - empty)
1393        // Map 2: null           (0 entries but NULL via null buffer)
1394        // Map 3: {"key2": 2, "key3": 3}  (2 entries)
1395        let offsets = OffsetBuffer::new(vec![0, 1, 1, 1, 3].into());
1396
1397        // Create null buffer - map at index 2 is NULL
1398        let null_buffer = Some(NullBuffer::from(vec![true, true, false, true]));
1399
1400        // Create the map field
1401        let map_field = Arc::new(Field::new(
1402            "entries",
1403            DataType::Struct(entries_fields),
1404            false, // Keys are non-nullable
1405        ));
1406
1407        // Create MapArray using try_new
1408        let map_array = MapArray::try_new(
1409            map_field,
1410            offsets,
1411            entries,
1412            null_buffer,
1413            false, // not ordered
1414        )
1415        .unwrap();
1416
1417        let variant_array = execute_row_builder_test(&map_array);
1418
1419        // Map 0: {"key1": 1}
1420        let map0 = variant_array.value(0);
1421        let obj0 = map0.as_object().unwrap();
1422        assert_eq!(obj0.len(), 1);
1423        assert_eq!(obj0.get("key1"), Some(Variant::from(1)));
1424
1425        // Map 1: {} (empty object, not null)
1426        let map1 = variant_array.value(1);
1427        let obj1 = map1.as_object().unwrap();
1428        assert_eq!(obj1.len(), 0); // Empty object
1429
1430        // Map 2: null (actual NULL)
1431        assert!(variant_array.is_null(2));
1432
1433        // Map 3: {"key2": 2, "key3": 3}
1434        let map3 = variant_array.value(3);
1435        let obj3 = map3.as_object().unwrap();
1436        assert_eq!(obj3.len(), 2);
1437        assert_eq!(obj3.get("key2"), Some(Variant::from(2)));
1438        assert_eq!(obj3.get("key3"), Some(Variant::from(3)));
1439    }
1440
1441    #[test]
1442    fn test_union_sparse_row_builder() {
1443        use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1444        use arrow::buffer::ScalarBuffer;
1445        use arrow::datatypes::{DataType, Field, UnionFields};
1446        use std::sync::Arc;
1447
1448        // Create a sparse union array with mixed types (int, float, string)
1449        let int_array = Int32Array::from(vec![Some(1), None, None, None, Some(34), None]);
1450        let float_array = Float64Array::from(vec![None, Some(3.2), None, Some(32.5), None, None]);
1451        let string_array = StringArray::from(vec![None, None, Some("hello"), None, None, None]);
1452        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1453
1454        let union_fields = UnionFields::from_fields(vec![
1455            Field::new("int_field", DataType::Int32, false),
1456            Field::new("float_field", DataType::Float64, false),
1457            Field::new("string_field", DataType::Utf8, false),
1458        ]);
1459
1460        let children: Vec<Arc<dyn Array>> = vec![
1461            Arc::new(int_array),
1462            Arc::new(float_array),
1463            Arc::new(string_array),
1464        ];
1465
1466        let union_array = UnionArray::try_new(
1467            union_fields,
1468            type_ids,
1469            None, // Sparse union
1470            children,
1471        )
1472        .unwrap();
1473
1474        let variant_array = execute_row_builder_test(&union_array);
1475        assert_eq!(variant_array.value(0), Variant::Int32(1));
1476        assert_eq!(variant_array.value(1), Variant::Double(3.2));
1477        assert_eq!(variant_array.value(2), Variant::from("hello"));
1478        assert_eq!(variant_array.value(3), Variant::Double(32.5));
1479        assert_eq!(variant_array.value(4), Variant::Int32(34));
1480        assert!(variant_array.is_null(5));
1481    }
1482
1483    #[test]
1484    fn test_union_dense_row_builder() {
1485        use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
1486        use arrow::buffer::ScalarBuffer;
1487        use arrow::datatypes::{DataType, Field, UnionFields};
1488        use std::sync::Arc;
1489
1490        // Create a dense union array with mixed types (int, float, string)
1491        let int_array = Int32Array::from(vec![Some(1), Some(34), None]);
1492        let float_array = Float64Array::from(vec![3.2, 32.5]);
1493        let string_array = StringArray::from(vec!["hello"]);
1494        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
1495        let offsets = [0, 0, 0, 1, 1, 2]
1496            .into_iter()
1497            .collect::<ScalarBuffer<i32>>();
1498
1499        let union_fields = UnionFields::from_fields(vec![
1500            Field::new("int_field", DataType::Int32, false),
1501            Field::new("float_field", DataType::Float64, false),
1502            Field::new("string_field", DataType::Utf8, false),
1503        ]);
1504
1505        let children: Vec<Arc<dyn Array>> = vec![
1506            Arc::new(int_array),
1507            Arc::new(float_array),
1508            Arc::new(string_array),
1509        ];
1510
1511        let union_array = UnionArray::try_new(
1512            union_fields,
1513            type_ids,
1514            Some(offsets), // Dense union
1515            children,
1516        )
1517        .unwrap();
1518
1519        // Test the row builder
1520        let options = CastOptions {
1521            safe: false,
1522            ..Default::default()
1523        };
1524        let mut row_builder =
1525            make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1526                .unwrap();
1527
1528        let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1529        for i in 0..union_array.len() {
1530            row_builder.append_row(&mut variant_builder, i).unwrap();
1531        }
1532        let variant_array = variant_builder.build();
1533
1534        assert_eq!(variant_array.len(), 6);
1535        assert_eq!(variant_array.value(0), Variant::Int32(1));
1536        assert_eq!(variant_array.value(1), Variant::Double(3.2));
1537        assert_eq!(variant_array.value(2), Variant::from("hello"));
1538        assert_eq!(variant_array.value(3), Variant::Double(32.5));
1539        assert_eq!(variant_array.value(4), Variant::Int32(34));
1540        assert!(variant_array.is_null(5));
1541    }
1542
1543    #[test]
1544    fn test_union_sparse_type_ids_row_builder() {
1545        use arrow::array::{Int32Array, StringArray, UnionArray};
1546        use arrow::buffer::ScalarBuffer;
1547        use arrow::datatypes::{DataType, Field, UnionFields};
1548        use std::sync::Arc;
1549
1550        // Create a sparse union with non-contiguous type IDs (1, 3)
1551        let int_array = Int32Array::from(vec![Some(42), None]);
1552        let string_array = StringArray::from(vec![None, Some("test")]);
1553        let type_ids = [1, 3].into_iter().collect::<ScalarBuffer<i8>>();
1554
1555        let union_fields = UnionFields::try_new(
1556            vec![1, 3], // Non-contiguous type IDs
1557            vec![
1558                Field::new("int_field", DataType::Int32, false),
1559                Field::new("string_field", DataType::Utf8, false),
1560            ],
1561        )
1562        .unwrap();
1563
1564        let children: Vec<Arc<dyn Array>> = vec![Arc::new(int_array), Arc::new(string_array)];
1565
1566        let union_array = UnionArray::try_new(
1567            union_fields,
1568            type_ids,
1569            None, // Sparse union
1570            children,
1571        )
1572        .unwrap();
1573
1574        // Test the row builder
1575        let options = CastOptions {
1576            safe: false,
1577            ..Default::default()
1578        };
1579        let mut row_builder =
1580            make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
1581                .unwrap();
1582
1583        let mut variant_builder = VariantArrayBuilder::new(union_array.len());
1584        for i in 0..union_array.len() {
1585            row_builder.append_row(&mut variant_builder, i).unwrap();
1586        }
1587        let variant_array = variant_builder.build();
1588
1589        // Verify results
1590        assert_eq!(variant_array.len(), 2);
1591
1592        // Row 0: int 42 (type_id = 1)
1593        assert_eq!(variant_array.value(0), Variant::Int32(42));
1594
1595        // Row 1: string "test" (type_id = 3)
1596        assert_eq!(variant_array.value(1), Variant::from("test"));
1597    }
1598
1599    #[test]
1600    fn test_decimal32_row_builder() {
1601        use arrow::array::Decimal32Array;
1602        use parquet_variant::VariantDecimal4;
1603
1604        // Test Decimal32Array with scale 2 (e.g., for currency: 12.34)
1605        let decimal_array = Decimal32Array::from(vec![Some(1234), None, Some(-5678)])
1606            .with_precision_and_scale(9, 2)
1607            .unwrap();
1608
1609        test_row_builder_basic(
1610            &decimal_array,
1611            vec![
1612                Some(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())),
1613                None,
1614                Some(Variant::from(VariantDecimal4::try_new(-5678, 2).unwrap())),
1615            ],
1616        );
1617    }
1618
1619    #[test]
1620    fn test_decimal128_row_builder() {
1621        use arrow::array::Decimal128Array;
1622        use parquet_variant::VariantDecimal16;
1623
1624        // Test Decimal128Array with negative scale (multiply by 10^|scale|)
1625        let decimal_array = Decimal128Array::from(vec![Some(123), None, Some(456)])
1626            .with_precision_and_scale(10, -2)
1627            .unwrap();
1628
1629        test_row_builder_basic(
1630            &decimal_array,
1631            vec![
1632                Some(Variant::from(VariantDecimal16::try_new(12300, 0).unwrap())),
1633                None,
1634                Some(Variant::from(VariantDecimal16::try_new(45600, 0).unwrap())),
1635            ],
1636        );
1637    }
1638
1639    #[test]
1640    fn test_decimal256_overflow_row_builder() {
1641        use arrow::array::Decimal256Array;
1642        use arrow::datatypes::i256;
1643
1644        // Test Decimal256Array with a value that overflows i128
1645        let large_value = i256::from_i128(i128::MAX) + i256::from(1); // Overflows i128
1646        let decimal_array = Decimal256Array::from(vec![Some(large_value), Some(i256::from(123))])
1647            .with_precision_and_scale(76, 3)
1648            .unwrap();
1649
1650        test_row_builder_basic_with_options(
1651            &decimal_array,
1652            vec![
1653                Some(Variant::Null), // Overflow value becomes Variant::Null
1654                Some(Variant::from(VariantDecimal16::try_new(123, 3).unwrap())),
1655            ],
1656            CastOptions::default(),
1657        );
1658    }
1659
1660    #[test]
1661    fn test_binary_row_builder() {
1662        use arrow::array::BinaryArray;
1663
1664        let binary_data = vec![
1665            Some(b"hello".as_slice()),
1666            None,
1667            Some(b"\x00\x01\x02\xFF".as_slice()),
1668            Some(b"".as_slice()), // Empty binary
1669        ];
1670        let binary_array = BinaryArray::from(binary_data);
1671
1672        test_row_builder_basic(
1673            &binary_array,
1674            vec![
1675                Some(Variant::from(b"hello".as_slice())),
1676                None,
1677                Some(Variant::from([0x00, 0x01, 0x02, 0xFF].as_slice())),
1678                Some(Variant::from([].as_slice())),
1679            ],
1680        );
1681    }
1682
1683    #[test]
1684    fn test_binary_view_row_builder() {
1685        use arrow::array::BinaryViewArray;
1686
1687        let binary_data = vec![
1688            Some(b"short".as_slice()),
1689            None,
1690            Some(b"this is a longer binary view that exceeds inline storage".as_slice()),
1691        ];
1692        let binary_view_array = BinaryViewArray::from(binary_data);
1693
1694        test_row_builder_basic(
1695            &binary_view_array,
1696            vec![
1697                Some(Variant::from(b"short".as_slice())),
1698                None,
1699                Some(Variant::from(
1700                    b"this is a longer binary view that exceeds inline storage".as_slice(),
1701                )),
1702            ],
1703        );
1704    }
1705
1706    #[test]
1707    fn test_fixed_size_binary_row_builder() {
1708        use arrow::array::FixedSizeBinaryArray;
1709
1710        let binary_data = vec![
1711            Some([0x01, 0x02, 0x03, 0x04]),
1712            None,
1713            Some([0xFF, 0xFE, 0xFD, 0xFC]),
1714        ];
1715        let fixed_binary_array =
1716            FixedSizeBinaryArray::try_from_sparse_iter_with_size(binary_data.into_iter(), 4)
1717                .unwrap();
1718
1719        test_row_builder_basic(
1720            &fixed_binary_array,
1721            vec![
1722                Some(Variant::from([0x01, 0x02, 0x03, 0x04].as_slice())),
1723                None,
1724                Some(Variant::from([0xFF, 0xFE, 0xFD, 0xFC].as_slice())),
1725            ],
1726        );
1727    }
1728
1729    #[test]
1730    fn test_utf8_view_row_builder() {
1731        use arrow::array::StringViewArray;
1732
1733        let string_data = vec![
1734            Some("short"),
1735            None,
1736            Some("this is a much longer string that will be stored out-of-line in the buffer"),
1737        ];
1738        let string_view_array = StringViewArray::from(string_data);
1739
1740        test_row_builder_basic(
1741            &string_view_array,
1742            vec![
1743                Some(Variant::from("short")),
1744                None,
1745                Some(Variant::from(
1746                    "this is a much longer string that will be stored out-of-line in the buffer",
1747                )),
1748            ],
1749        );
1750    }
1751
1752    #[test]
1753    fn test_timestamp_second_row_builder() {
1754        use arrow::array::TimestampSecondArray;
1755
1756        let timestamp_data = vec![
1757            Some(1609459200), // 2021-01-01 00:00:00 UTC
1758            None,
1759            Some(1640995200), // 2022-01-01 00:00:00 UTC
1760        ];
1761        let timestamp_array = TimestampSecondArray::from(timestamp_data);
1762
1763        let expected_naive1 = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1764        let expected_naive2 = DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc();
1765
1766        test_row_builder_basic(
1767            &timestamp_array,
1768            vec![
1769                Some(Variant::from(expected_naive1)),
1770                None,
1771                Some(Variant::from(expected_naive2)),
1772            ],
1773        );
1774    }
1775
1776    #[test]
1777    fn test_timestamp_with_timezone_row_builder() {
1778        use arrow::array::TimestampMicrosecondArray;
1779        use chrono::DateTime;
1780
1781        let timestamp_data = vec![
1782            Some(1609459200000000), // 2021-01-01 00:00:00 UTC (in microseconds)
1783            None,
1784            Some(1640995200000000), // 2022-01-01 00:00:00 UTC (in microseconds)
1785        ];
1786        let timezone = "UTC".to_string();
1787        let timestamp_array =
1788            TimestampMicrosecondArray::from(timestamp_data).with_timezone(timezone);
1789
1790        let expected_utc1 = DateTime::from_timestamp(1609459200, 0).unwrap();
1791        let expected_utc2 = DateTime::from_timestamp(1640995200, 0).unwrap();
1792
1793        test_row_builder_basic(
1794            &timestamp_array,
1795            vec![
1796                Some(Variant::from(expected_utc1)),
1797                None,
1798                Some(Variant::from(expected_utc2)),
1799            ],
1800        );
1801    }
1802
1803    #[test]
1804    fn test_timestamp_nanosecond_precision_row_builder() {
1805        use arrow::array::TimestampNanosecondArray;
1806
1807        let timestamp_data = vec![
1808            Some(1609459200123456789), // 2021-01-01 00:00:00.123456789 UTC
1809            None,
1810            Some(1609459200000000000), // 2021-01-01 00:00:00.000000000 UTC (no fractional seconds)
1811        ];
1812        let timestamp_array = TimestampNanosecondArray::from(timestamp_data);
1813
1814        let expected_with_nanos = DateTime::from_timestamp(1609459200, 123456789)
1815            .unwrap()
1816            .naive_utc();
1817        let expected_no_nanos = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1818
1819        test_row_builder_basic(
1820            &timestamp_array,
1821            vec![
1822                Some(Variant::from(expected_with_nanos)),
1823                None,
1824                Some(Variant::from(expected_no_nanos)),
1825            ],
1826        );
1827    }
1828
1829    #[test]
1830    fn test_timestamp_millisecond_row_builder() {
1831        use arrow::array::TimestampMillisecondArray;
1832
1833        let timestamp_data = vec![
1834            Some(1609459200123), // 2021-01-01 00:00:00.123 UTC
1835            None,
1836            Some(1609459200000), // 2021-01-01 00:00:00.000 UTC
1837        ];
1838        let timestamp_array = TimestampMillisecondArray::from(timestamp_data);
1839
1840        let expected_with_millis = DateTime::from_timestamp(1609459200, 123000000)
1841            .unwrap()
1842            .naive_utc();
1843        let expected_no_millis = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
1844
1845        test_row_builder_basic(
1846            &timestamp_array,
1847            vec![
1848                Some(Variant::from(expected_with_millis)),
1849                None,
1850                Some(Variant::from(expected_no_millis)),
1851            ],
1852        );
1853    }
1854
1855    #[test]
1856    fn test_date32_row_builder() {
1857        use arrow::array::Date32Array;
1858        use chrono::NaiveDate;
1859
1860        let date_data = vec![
1861            Some(0), // 1970-01-01
1862            None,
1863            Some(19723),   // 2024-01-01 (days since epoch)
1864            Some(-719162), // 0001-01-01 (near minimum)
1865        ];
1866        let date_array = Date32Array::from(date_data);
1867
1868        let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1869        let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1870        let expected_min = NaiveDate::from_ymd_opt(1, 1, 1).unwrap();
1871
1872        test_row_builder_basic(
1873            &date_array,
1874            vec![
1875                Some(Variant::from(expected_epoch)),
1876                None,
1877                Some(Variant::from(expected_2024)),
1878                Some(Variant::from(expected_min)),
1879            ],
1880        );
1881    }
1882
1883    #[test]
1884    fn test_date64_row_builder() {
1885        use arrow::array::Date64Array;
1886        use chrono::NaiveDate;
1887
1888        // Test Date64Array with various dates (milliseconds since epoch)
1889        let date_data = vec![
1890            Some(0), // 1970-01-01
1891            None,
1892            Some(1704067200000), // 2024-01-01 (milliseconds since epoch)
1893            Some(86400000),      // 1970-01-02
1894        ];
1895        let date_array = Date64Array::from(date_data);
1896
1897        let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
1898        let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
1899        let expected_next_day = NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();
1900
1901        test_row_builder_basic(
1902            &date_array,
1903            vec![
1904                Some(Variant::from(expected_epoch)),
1905                None,
1906                Some(Variant::from(expected_2024)),
1907                Some(Variant::from(expected_next_day)),
1908            ],
1909        );
1910    }
1911
1912    #[test]
1913    fn test_time32_second_row_builder() {
1914        use arrow::array::Time32SecondArray;
1915        use chrono::NaiveTime;
1916
1917        // Test Time32SecondArray with various times (seconds since midnight)
1918        let time_data = vec![
1919            Some(0), // 00:00:00
1920            None,
1921            Some(3661),  // 01:01:01
1922            Some(86399), // 23:59:59
1923        ];
1924        let time_array = Time32SecondArray::from(time_data);
1925
1926        let expected_midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
1927        let expected_time = NaiveTime::from_hms_opt(1, 1, 1).unwrap();
1928        let expected_last = NaiveTime::from_hms_opt(23, 59, 59).unwrap();
1929
1930        test_row_builder_basic(
1931            &time_array,
1932            vec![
1933                Some(Variant::from(expected_midnight)),
1934                None,
1935                Some(Variant::from(expected_time)),
1936                Some(Variant::from(expected_last)),
1937            ],
1938        );
1939    }
1940
1941    #[test]
1942    fn test_time32_millisecond_row_builder() {
1943        use arrow::array::Time32MillisecondArray;
1944        use chrono::NaiveTime;
1945
1946        // Test Time32MillisecondArray with various times (milliseconds since midnight)
1947        let time_data = vec![
1948            Some(0), // 00:00:00.000
1949            None,
1950            Some(3661123),  // 01:01:01.123
1951            Some(86399999), // 23:59:59.999
1952        ];
1953        let time_array = Time32MillisecondArray::from(time_data);
1954
1955        let expected_midnight = NaiveTime::from_hms_milli_opt(0, 0, 0, 0).unwrap();
1956        let expected_time = NaiveTime::from_hms_milli_opt(1, 1, 1, 123).unwrap();
1957        let expected_last = NaiveTime::from_hms_milli_opt(23, 59, 59, 999).unwrap();
1958
1959        test_row_builder_basic(
1960            &time_array,
1961            vec![
1962                Some(Variant::from(expected_midnight)),
1963                None,
1964                Some(Variant::from(expected_time)),
1965                Some(Variant::from(expected_last)),
1966            ],
1967        );
1968    }
1969
1970    #[test]
1971    fn test_time64_microsecond_row_builder() {
1972        use arrow::array::Time64MicrosecondArray;
1973        use chrono::NaiveTime;
1974
1975        // Test Time64MicrosecondArray with various times (microseconds since midnight)
1976        let time_data = vec![
1977            Some(0), // 00:00:00.000000
1978            None,
1979            Some(3661123456),  // 01:01:01.123456
1980            Some(86399999999), // 23:59:59.999999
1981        ];
1982        let time_array = Time64MicrosecondArray::from(time_data);
1983
1984        let expected_midnight = NaiveTime::from_hms_micro_opt(0, 0, 0, 0).unwrap();
1985        let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
1986        let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
1987
1988        test_row_builder_basic(
1989            &time_array,
1990            vec![
1991                Some(Variant::from(expected_midnight)),
1992                None,
1993                Some(Variant::from(expected_time)),
1994                Some(Variant::from(expected_last)),
1995            ],
1996        );
1997    }
1998
1999    #[test]
2000    fn test_time64_nanosecond_row_builder() {
2001        use arrow::array::Time64NanosecondArray;
2002        use chrono::NaiveTime;
2003
2004        // Test Time64NanosecondArray with various times (nanoseconds since midnight)
2005        let time_data = vec![
2006            Some(0), // 00:00:00.000000000
2007            None,
2008            Some(3661123456789),  // 01:01:01.123456789
2009            Some(86399999999999), // 23:59:59.999999999
2010        ];
2011        let time_array = Time64NanosecondArray::from(time_data);
2012
2013        let expected_midnight = NaiveTime::from_hms_nano_opt(0, 0, 0, 0).unwrap();
2014        // Nanoseconds are truncated to microsecond precision in Variant
2015        let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
2016        let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
2017
2018        test_row_builder_basic(
2019            &time_array,
2020            vec![
2021                Some(Variant::from(expected_midnight)),
2022                None,
2023                Some(Variant::from(expected_time)),
2024                Some(Variant::from(expected_last)),
2025            ],
2026        );
2027    }
2028}