Skip to main content

parquet_variant_compute/
variant_to_arrow.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::shred_variant::{
19    NullValue, VariantToShreddedVariantRowBuilder,
20    make_variant_to_shredded_variant_arrow_row_builder,
21};
22use crate::type_conversion::{
23    PrimitiveFromVariant, TimestampFromVariant, variant_cast_with_options,
24    variant_to_unscaled_decimal,
25};
26use crate::variant_array::ShreddedVariantFieldArray;
27use crate::{VariantArray, VariantValueArrayBuilder};
28use arrow::array::{
29    ArrayRef, ArrowNativeTypeOp, BinaryBuilder, BinaryLikeArrayBuilder, BinaryViewBuilder,
30    BooleanBuilder, FixedSizeBinaryBuilder, FixedSizeListArray, GenericListArray,
31    GenericListViewArray, LargeBinaryBuilder, LargeStringBuilder, NullArray, NullBufferBuilder,
32    OffsetSizeTrait, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder,
33    StructArray,
34};
35use arrow::buffer::{OffsetBuffer, ScalarBuffer};
36use arrow::compute::{CastOptions, DecimalCast, cast_with_options};
37use arrow::datatypes::{self, DataType, DecimalType};
38use arrow::error::{ArrowError, Result};
39use arrow_schema::{FieldRef, Fields, TimeUnit};
40use parquet_variant::{Variant, VariantPath};
41use std::sync::Arc;
42
43/// Builder for converting variant values into strongly typed Arrow arrays.
44///
45/// Useful for variant_get kernels that need to extract specific paths from variant values, possibly
46/// with casting of leaf values to specific types.
47pub(crate) enum VariantToArrowRowBuilder<'a> {
48    Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
49    Array(ArrayVariantToArrowRowBuilder<'a>),
50    Struct(StructVariantToArrowRowBuilder<'a>),
51    Encoded(EncodedVariantToArrowRowBuilder<'a>),
52    BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
53
54    // Path extraction wrapper - contains a boxed enum for any of the above
55    WithPath(VariantPathRowBuilder<'a>),
56}
57
58impl<'a> VariantToArrowRowBuilder<'a> {
59    pub fn append_null(&mut self) -> Result<()> {
60        use VariantToArrowRowBuilder::*;
61        match self {
62            Primitive(b) => b.append_null(),
63            Array(b) => b.append_null(),
64            Struct(b) => b.append_null(),
65            Encoded(b) => b.append_null(),
66            BinaryVariant(b) => b.append_null(),
67            WithPath(path_builder) => path_builder.append_null(),
68        }
69    }
70
71    pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
72        use VariantToArrowRowBuilder::*;
73        match self {
74            Primitive(b) => b.append_value(&value),
75            Array(b) => b.append_value(&value),
76            Struct(b) => b.append_value(&value),
77            Encoded(b) => b.append_value(value),
78            BinaryVariant(b) => b.append_value(value),
79            WithPath(path_builder) => path_builder.append_value(value),
80        }
81    }
82
83    pub fn finish(self) -> Result<ArrayRef> {
84        use VariantToArrowRowBuilder::*;
85        match self {
86            Primitive(b) => b.finish(),
87            Array(b) => b.finish(),
88            Struct(b) => b.finish(),
89            Encoded(b) => b.finish(),
90            BinaryVariant(b) => b.finish(),
91            WithPath(path_builder) => path_builder.finish(),
92        }
93    }
94}
95
96fn make_typed_variant_to_arrow_row_builder<'a>(
97    data_type: &'a DataType,
98    cast_options: &'a CastOptions,
99    capacity: usize,
100) -> Result<VariantToArrowRowBuilder<'a>> {
101    use VariantToArrowRowBuilder::*;
102
103    match data_type {
104        DataType::Struct(fields) => {
105            let builder = StructVariantToArrowRowBuilder::try_new(fields, cast_options, capacity)?;
106            Ok(Struct(builder))
107        }
108        data_type @ (DataType::List(_)
109        | DataType::LargeList(_)
110        | DataType::ListView(_)
111        | DataType::LargeListView(_)
112        | DataType::FixedSizeList(..)) => {
113            let builder =
114                ArrayVariantToArrowRowBuilder::try_new(data_type, cast_options, capacity, false)?;
115            Ok(Array(builder))
116        }
117        DataType::Dictionary(_, value_type) => {
118            let builder = EncodedVariantToArrowRowBuilder::try_new(
119                data_type,
120                value_type.as_ref(),
121                cast_options,
122                capacity,
123            )?;
124            Ok(Encoded(builder))
125        }
126        DataType::RunEndEncoded(_, value_field) => {
127            let builder = EncodedVariantToArrowRowBuilder::try_new(
128                data_type,
129                value_field.data_type(),
130                cast_options,
131                capacity,
132            )?;
133            Ok(Encoded(builder))
134        }
135        data_type => {
136            let builder =
137                make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
138            Ok(Primitive(builder))
139        }
140    }
141}
142
143pub(crate) fn make_variant_to_arrow_row_builder<'a>(
144    metadata: &ArrayRef,
145    path: VariantPath<'a>,
146    data_type: Option<&'a DataType>,
147    cast_options: &'a CastOptions,
148    capacity: usize,
149) -> Result<VariantToArrowRowBuilder<'a>> {
150    use VariantToArrowRowBuilder::*;
151
152    let mut builder = match data_type {
153        // If no data type was requested, build an unshredded VariantArray.
154        None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
155            metadata.clone(),
156            capacity,
157        )),
158        Some(data_type) => {
159            make_typed_variant_to_arrow_row_builder(data_type, cast_options, capacity)?
160        }
161    };
162
163    // Wrap with path extraction if needed
164    if !path.is_empty() {
165        builder = WithPath(VariantPathRowBuilder {
166            builder: Box::new(builder),
167            path,
168        })
169    };
170
171    Ok(builder)
172}
173
174/// Builder for converting primitive variant values to Arrow arrays. It is used by both
175/// `VariantToArrowRowBuilder` (below) and `VariantToShreddedPrimitiveVariantRowBuilder` (in
176/// `shred_variant.rs`).
177pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
178    Null(VariantToNullArrowRowBuilder<'a>),
179    Boolean(VariantToBooleanArrowRowBuilder<'a>),
180    Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
181    Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
182    Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
183    Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>),
184    UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
185    UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
186    UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
187    UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
188    Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
189    Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
190    Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
191    Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>),
192    Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>),
193    Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>),
194    Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>),
195    TimestampSecond(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampSecondType>),
196    TimestampSecondNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampSecondType>),
197    TimestampMilli(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMillisecondType>),
198    TimestampMilliNtz(
199        VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMillisecondType>,
200    ),
201    TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>),
202    TimestampMicroNtz(
203        VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>,
204    ),
205    TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
206    TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
207    Time32Second(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32SecondType>),
208    Time32Milli(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32MillisecondType>),
209    Time64Micro(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>),
210    Time64Nano(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64NanosecondType>),
211    Date32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
212    Date64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date64Type>),
213    Uuid(VariantToUuidArrowRowBuilder<'a>),
214    String(VariantToStringArrowBuilder<'a, StringBuilder>),
215    LargeString(VariantToStringArrowBuilder<'a, LargeStringBuilder>),
216    StringView(VariantToStringArrowBuilder<'a, StringViewBuilder>),
217    Binary(VariantToBinaryArrowRowBuilder<'a, BinaryBuilder>),
218    LargeBinary(VariantToBinaryArrowRowBuilder<'a, LargeBinaryBuilder>),
219    BinaryView(VariantToBinaryArrowRowBuilder<'a, BinaryViewBuilder>),
220}
221
222impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
223    pub fn append_null(&mut self) -> Result<()> {
224        use PrimitiveVariantToArrowRowBuilder::*;
225        match self {
226            Null(b) => b.append_null(),
227            Boolean(b) => b.append_null(),
228            Int8(b) => b.append_null(),
229            Int16(b) => b.append_null(),
230            Int32(b) => b.append_null(),
231            Int64(b) => b.append_null(),
232            UInt8(b) => b.append_null(),
233            UInt16(b) => b.append_null(),
234            UInt32(b) => b.append_null(),
235            UInt64(b) => b.append_null(),
236            Float16(b) => b.append_null(),
237            Float32(b) => b.append_null(),
238            Float64(b) => b.append_null(),
239            Decimal32(b) => b.append_null(),
240            Decimal64(b) => b.append_null(),
241            Decimal128(b) => b.append_null(),
242            Decimal256(b) => b.append_null(),
243            TimestampSecond(b) => b.append_null(),
244            TimestampSecondNtz(b) => b.append_null(),
245            TimestampMilli(b) => b.append_null(),
246            TimestampMilliNtz(b) => b.append_null(),
247            TimestampMicro(b) => b.append_null(),
248            TimestampMicroNtz(b) => b.append_null(),
249            TimestampNano(b) => b.append_null(),
250            TimestampNanoNtz(b) => b.append_null(),
251            Time32Second(b) => b.append_null(),
252            Time32Milli(b) => b.append_null(),
253            Time64Micro(b) => b.append_null(),
254            Time64Nano(b) => b.append_null(),
255            Date32(b) => b.append_null(),
256            Date64(b) => b.append_null(),
257            Uuid(b) => b.append_null(),
258            String(b) => b.append_null(),
259            LargeString(b) => b.append_null(),
260            StringView(b) => b.append_null(),
261            Binary(b) => b.append_null(),
262            LargeBinary(b) => b.append_null(),
263            BinaryView(b) => b.append_null(),
264        }
265    }
266
267    pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
268        use PrimitiveVariantToArrowRowBuilder::*;
269        match self {
270            Null(b) => b.append_value(value),
271            Boolean(b) => b.append_value(value),
272            Int8(b) => b.append_value(value),
273            Int16(b) => b.append_value(value),
274            Int32(b) => b.append_value(value),
275            Int64(b) => b.append_value(value),
276            UInt8(b) => b.append_value(value),
277            UInt16(b) => b.append_value(value),
278            UInt32(b) => b.append_value(value),
279            UInt64(b) => b.append_value(value),
280            Float16(b) => b.append_value(value),
281            Float32(b) => b.append_value(value),
282            Float64(b) => b.append_value(value),
283            Decimal32(b) => b.append_value(value),
284            Decimal64(b) => b.append_value(value),
285            Decimal128(b) => b.append_value(value),
286            Decimal256(b) => b.append_value(value),
287            TimestampSecond(b) => b.append_value(value),
288            TimestampSecondNtz(b) => b.append_value(value),
289            TimestampMilli(b) => b.append_value(value),
290            TimestampMilliNtz(b) => b.append_value(value),
291            TimestampMicro(b) => b.append_value(value),
292            TimestampMicroNtz(b) => b.append_value(value),
293            TimestampNano(b) => b.append_value(value),
294            TimestampNanoNtz(b) => b.append_value(value),
295            Time32Second(b) => b.append_value(value),
296            Time32Milli(b) => b.append_value(value),
297            Time64Micro(b) => b.append_value(value),
298            Time64Nano(b) => b.append_value(value),
299            Date32(b) => b.append_value(value),
300            Date64(b) => b.append_value(value),
301            Uuid(b) => b.append_value(value),
302            String(b) => b.append_value(value),
303            LargeString(b) => b.append_value(value),
304            StringView(b) => b.append_value(value),
305            Binary(b) => b.append_value(value),
306            LargeBinary(b) => b.append_value(value),
307            BinaryView(b) => b.append_value(value),
308        }
309    }
310
311    pub fn finish(self) -> Result<ArrayRef> {
312        use PrimitiveVariantToArrowRowBuilder::*;
313        match self {
314            Null(b) => b.finish(),
315            Boolean(b) => b.finish(),
316            Int8(b) => b.finish(),
317            Int16(b) => b.finish(),
318            Int32(b) => b.finish(),
319            Int64(b) => b.finish(),
320            UInt8(b) => b.finish(),
321            UInt16(b) => b.finish(),
322            UInt32(b) => b.finish(),
323            UInt64(b) => b.finish(),
324            Float16(b) => b.finish(),
325            Float32(b) => b.finish(),
326            Float64(b) => b.finish(),
327            Decimal32(b) => b.finish(),
328            Decimal64(b) => b.finish(),
329            Decimal128(b) => b.finish(),
330            Decimal256(b) => b.finish(),
331            TimestampSecond(b) => b.finish(),
332            TimestampSecondNtz(b) => b.finish(),
333            TimestampMilli(b) => b.finish(),
334            TimestampMilliNtz(b) => b.finish(),
335            TimestampMicro(b) => b.finish(),
336            TimestampMicroNtz(b) => b.finish(),
337            TimestampNano(b) => b.finish(),
338            TimestampNanoNtz(b) => b.finish(),
339            Time32Second(b) => b.finish(),
340            Time32Milli(b) => b.finish(),
341            Time64Micro(b) => b.finish(),
342            Time64Nano(b) => b.finish(),
343            Date32(b) => b.finish(),
344            Date64(b) => b.finish(),
345            Uuid(b) => b.finish(),
346            String(b) => b.finish(),
347            LargeString(b) => b.finish(),
348            StringView(b) => b.finish(),
349            Binary(b) => b.finish(),
350            LargeBinary(b) => b.finish(),
351            BinaryView(b) => b.finish(),
352        }
353    }
354}
355
356pub(crate) struct EncodedVariantToArrowRowBuilder<'a> {
357    data_type: &'a DataType,
358    cast_options: &'a CastOptions<'a>,
359    values_builder: Box<VariantToArrowRowBuilder<'a>>,
360}
361
362impl<'a> EncodedVariantToArrowRowBuilder<'a> {
363    fn try_new(
364        data_type: &'a DataType,
365        value_type: &'a DataType,
366        cast_options: &'a CastOptions,
367        capacity: usize,
368    ) -> Result<Self> {
369        let values_builder = Box::new(make_typed_variant_to_arrow_row_builder(
370            value_type,
371            cast_options,
372            capacity,
373        )?);
374        Ok(Self {
375            data_type,
376            cast_options,
377            values_builder,
378        })
379    }
380
381    fn append_null(&mut self) -> Result<()> {
382        self.values_builder.append_null()
383    }
384
385    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
386        self.values_builder.append_value(value)
387    }
388
389    fn finish(self) -> Result<ArrayRef> {
390        let values = self.values_builder.finish()?;
391        cast_with_options(values.as_ref(), self.data_type, self.cast_options)
392    }
393}
394
395/// Creates a row builder that converts primitive `Variant` values into the requested Arrow data type.
396pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
397    data_type: &'a DataType,
398    cast_options: &'a CastOptions,
399    capacity: usize,
400) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
401    use PrimitiveVariantToArrowRowBuilder::*;
402
403    let builder =
404        match data_type {
405            DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
406            DataType::Boolean => {
407                Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity))
408            }
409            DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
410                cast_options,
411                capacity,
412            )),
413            DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
414                cast_options,
415                capacity,
416            )),
417            DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
418                cast_options,
419                capacity,
420            )),
421            DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
422                cast_options,
423                capacity,
424            )),
425            DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
426                cast_options,
427                capacity,
428            )),
429            DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
430                cast_options,
431                capacity,
432            )),
433            DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
434                cast_options,
435                capacity,
436            )),
437            DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
438                cast_options,
439                capacity,
440            )),
441            DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
442                cast_options,
443                capacity,
444            )),
445            DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
446                cast_options,
447                capacity,
448            )),
449            DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
450                cast_options,
451                capacity,
452            )),
453            DataType::Decimal32(precision, scale) => Decimal32(
454                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
455            ),
456            DataType::Decimal64(precision, scale) => Decimal64(
457                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
458            ),
459            DataType::Decimal128(precision, scale) => Decimal128(
460                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
461            ),
462            DataType::Decimal256(precision, scale) => Decimal256(
463                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
464            ),
465            DataType::Date32 => Date32(VariantToPrimitiveArrowRowBuilder::new(
466                cast_options,
467                capacity,
468            )),
469            DataType::Date64 => Date64(VariantToPrimitiveArrowRowBuilder::new(
470                cast_options,
471                capacity,
472            )),
473            DataType::Time32(TimeUnit::Second) => Time32Second(
474                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
475            ),
476            DataType::Time32(TimeUnit::Millisecond) => Time32Milli(
477                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
478            ),
479            DataType::Time32(t) => {
480                return Err(ArrowError::InvalidArgumentError(format!(
481                    "The unit for Time32 must be second/millisecond, received {t:?}"
482                )));
483            }
484            DataType::Time64(TimeUnit::Microsecond) => Time64Micro(
485                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
486            ),
487            DataType::Time64(TimeUnit::Nanosecond) => Time64Nano(
488                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
489            ),
490            DataType::Time64(t) => {
491                return Err(ArrowError::InvalidArgumentError(format!(
492                    "The unit for Time64 must be micro/nano seconds, received {t:?}"
493                )));
494            }
495            DataType::Timestamp(TimeUnit::Second, None) => TimestampSecondNtz(
496                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
497            ),
498            DataType::Timestamp(TimeUnit::Second, tz) => TimestampSecond(
499                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
500            ),
501            DataType::Timestamp(TimeUnit::Millisecond, None) => TimestampMilliNtz(
502                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
503            ),
504            DataType::Timestamp(TimeUnit::Millisecond, tz) => TimestampMilli(
505                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
506            ),
507            DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
508                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
509            ),
510            DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
511                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
512            ),
513            DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
514                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
515            ),
516            DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
517                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
518            ),
519            DataType::Duration(_) | DataType::Interval(_) => {
520                return Err(ArrowError::InvalidArgumentError(
521                    "Casting Variant to duration/interval types is not supported. \
522                    The Variant format does not define duration/interval types."
523                        .to_string(),
524                ));
525            }
526            DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)),
527            DataType::LargeBinary => {
528                LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
529            }
530            DataType::BinaryView => {
531                BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
532            }
533            DataType::FixedSizeBinary(16) => {
534                Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
535            }
536            DataType::FixedSizeBinary(_) => {
537                return Err(ArrowError::NotYetImplemented(format!(
538                    "DataType {data_type:?} not yet implemented"
539                )));
540            }
541            DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)),
542            DataType::LargeUtf8 => {
543                LargeString(VariantToStringArrowBuilder::new(cast_options, capacity))
544            }
545            DataType::Utf8View => {
546                StringView(VariantToStringArrowBuilder::new(cast_options, capacity))
547            }
548            DataType::List(_)
549            | DataType::LargeList(_)
550            | DataType::ListView(_)
551            | DataType::LargeListView(_)
552            | DataType::FixedSizeList(..)
553            | DataType::Struct(_)
554            | DataType::Map(..)
555            | DataType::Union(..)
556            | DataType::Dictionary(..)
557            | DataType::RunEndEncoded(..) => {
558                return Err(ArrowError::InvalidArgumentError(format!(
559                    "Casting to {data_type:?} is not applicable for primitive Variant types"
560                )));
561            }
562        };
563    Ok(builder)
564}
565
566pub(crate) enum ArrayVariantToArrowRowBuilder<'a> {
567    List(VariantToListArrowRowBuilder<'a, i32, false>),
568    LargeList(VariantToListArrowRowBuilder<'a, i64, false>),
569    ListView(VariantToListArrowRowBuilder<'a, i32, true>),
570    LargeListView(VariantToListArrowRowBuilder<'a, i64, true>),
571    FixedSizeList(VariantToFixedSizeListArrowRowBuilder<'a>),
572}
573
574pub(crate) struct StructVariantToArrowRowBuilder<'a> {
575    fields: &'a Fields,
576    field_builders: Vec<VariantToArrowRowBuilder<'a>>,
577    nulls: NullBufferBuilder,
578    cast_options: &'a CastOptions<'a>,
579}
580
581impl<'a> StructVariantToArrowRowBuilder<'a> {
582    fn try_new(
583        fields: &'a Fields,
584        cast_options: &'a CastOptions<'a>,
585        capacity: usize,
586    ) -> Result<Self> {
587        let mut field_builders = Vec::with_capacity(fields.len());
588        for field in fields.iter() {
589            field_builders.push(make_typed_variant_to_arrow_row_builder(
590                field.data_type(),
591                cast_options,
592                capacity,
593            )?);
594        }
595        Ok(Self {
596            fields,
597            field_builders,
598            nulls: NullBufferBuilder::new(capacity),
599            cast_options,
600        })
601    }
602
603    fn append_null(&mut self) -> Result<()> {
604        for builder in &mut self.field_builders {
605            builder.append_null()?;
606        }
607        self.nulls.append_null();
608        Ok(())
609    }
610
611    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
612        match variant_cast_with_options(value, self.cast_options, Variant::as_object) {
613            Ok(Some(obj)) => {
614                for (index, field) in self.fields.iter().enumerate() {
615                    match obj.get(field.name()) {
616                        Some(field_value) => {
617                            self.field_builders[index].append_value(field_value)?;
618                        }
619                        None => {
620                            self.field_builders[index].append_null()?;
621                        }
622                    }
623                }
624
625                self.nulls.append_non_null();
626                Ok(true)
627            }
628            Ok(None) => {
629                self.append_null()?;
630                Ok(false)
631            }
632            Err(_) => Err(ArrowError::CastError(format!(
633                "Failed to extract struct from variant {value:?}"
634            ))),
635        }
636    }
637
638    fn finish(mut self) -> Result<ArrayRef> {
639        let mut children = Vec::with_capacity(self.field_builders.len());
640        for builder in self.field_builders {
641            children.push(builder.finish()?);
642        }
643        Ok(Arc::new(StructArray::try_new(
644            self.fields.clone(),
645            children,
646            self.nulls.finish(),
647        )?))
648    }
649}
650
651impl<'a> ArrayVariantToArrowRowBuilder<'a> {
652    /// Creates a new list builder for the given data type.
653    ///
654    /// # Arguments
655    /// * `shredded` - If true, element builders produce shredded structs with `value`/`typed_value`
656    ///   fields (for [`crate::shred_variant()`]). If false, element builders produce strongly typed
657    ///   arrays directly (for [`crate::variant_get()`]).
658    pub(crate) fn try_new(
659        data_type: &'a DataType,
660        cast_options: &'a CastOptions,
661        capacity: usize,
662        shredded: bool,
663    ) -> Result<Self> {
664        use ArrayVariantToArrowRowBuilder::*;
665
666        // Make List/ListView builders without repeating the constructor boilerplate.
667        macro_rules! make_list_builder {
668            ($variant:ident, $offset:ty, $is_view:expr, $field:ident) => {
669                $variant(VariantToListArrowRowBuilder::<$offset, $is_view>::try_new(
670                    $field.clone(),
671                    $field.data_type(),
672                    cast_options,
673                    capacity,
674                    shredded,
675                )?)
676            };
677        }
678
679        let builder = match data_type {
680            DataType::List(field) => make_list_builder!(List, i32, false, field),
681            DataType::LargeList(field) => make_list_builder!(LargeList, i64, false, field),
682            DataType::ListView(field) => make_list_builder!(ListView, i32, true, field),
683            DataType::LargeListView(field) => make_list_builder!(LargeListView, i64, true, field),
684            DataType::FixedSizeList(field, size) => {
685                FixedSizeList(VariantToFixedSizeListArrowRowBuilder::try_new(
686                    field.clone(),
687                    field.data_type(),
688                    *size,
689                    cast_options,
690                    capacity,
691                    shredded,
692                )?)
693            }
694            other => {
695                return Err(ArrowError::InvalidArgumentError(format!(
696                    "Casting to {other:?} is not applicable for array Variant types"
697                )));
698            }
699        };
700        Ok(builder)
701    }
702
703    pub(crate) fn append_null(&mut self) -> Result<()> {
704        match self {
705            Self::List(builder) => builder.append_null(),
706            Self::LargeList(builder) => builder.append_null(),
707            Self::ListView(builder) => builder.append_null(),
708            Self::LargeListView(builder) => builder.append_null(),
709            Self::FixedSizeList(builder) => builder.append_null(),
710        }
711    }
712
713    pub(crate) fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
714        match self {
715            Self::List(builder) => builder.append_value(value),
716            Self::LargeList(builder) => builder.append_value(value),
717            Self::ListView(builder) => builder.append_value(value),
718            Self::LargeListView(builder) => builder.append_value(value),
719            Self::FixedSizeList(builder) => builder.append_value(value),
720        }
721    }
722
723    pub(crate) fn finish(self) -> Result<ArrayRef> {
724        match self {
725            Self::List(builder) => builder.finish(),
726            Self::LargeList(builder) => builder.finish(),
727            Self::ListView(builder) => builder.finish(),
728            Self::LargeListView(builder) => builder.finish(),
729            Self::FixedSizeList(builder) => builder.finish(),
730        }
731    }
732}
733
734/// A thin wrapper whose only job is to extract a specific path from a variant value and pass the
735/// result to a nested builder.
736pub(crate) struct VariantPathRowBuilder<'a> {
737    builder: Box<VariantToArrowRowBuilder<'a>>,
738    path: VariantPath<'a>,
739}
740
741impl<'a> VariantPathRowBuilder<'a> {
742    fn append_null(&mut self) -> Result<()> {
743        self.builder.append_null()
744    }
745
746    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
747        if let Some(v) = value.get_path(&self.path) {
748            self.builder.append_value(v)
749        } else {
750            self.builder.append_null()?;
751            Ok(false)
752        }
753    }
754
755    fn finish(self) -> Result<ArrayRef> {
756        self.builder.finish()
757    }
758}
759
760macro_rules! define_variant_to_primitive_builder {
761    (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
762    |$array_param:ident $(, $field:ident: $field_type:ty)?| -> $builder_name:ident $(< $array_type:ty >)? { $init_expr: expr },
763    |$value: ident| $value_transform:expr,
764    type_name: $type_name:expr) => {
765        pub(crate) struct $name<$lifetime $(, $generic : $bound )?>
766        {
767            builder: $builder_name $(<$array_type>)?,
768            cast_options: &$lifetime CastOptions<$lifetime>,
769        }
770
771        impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic )?> {
772            fn new(
773                cast_options: &$lifetime CastOptions<$lifetime>,
774                $array_param: usize,
775                // add this so that $init_expr can use it
776                $( $field: $field_type, )?
777            ) -> Self {
778                Self {
779                    builder: $init_expr,
780                    cast_options,
781                }
782            }
783
784            fn append_null(&mut self) -> Result<()> {
785                self.builder.append_null();
786                Ok(())
787            }
788
789            fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result<bool> {
790                match variant_cast_with_options(
791                    $value,
792                    self.cast_options,
793                    |$value| $value_transform,
794                ) {
795                    Ok(Some(v)) => {
796                        self.builder.append_value(v);
797                        Ok(true)
798                    }
799                    Ok(None) => {
800                        self.builder.append_null();
801                        Ok(false)
802                    }
803                    Err(_) => Err(ArrowError::CastError(format!(
804                        "Failed to extract primitive of type {type_name} from variant {value:?} at path VariantPath([])",
805                        type_name = $type_name,
806                        value = $value
807                    ))),
808                }
809            }
810
811            // Add this to silence unused mut warning from macro-generated code
812            // This is mainly for `FakeNullBuilder`
813            #[allow(unused_mut)]
814            fn finish(mut self) -> Result<ArrayRef> {
815                // If the builder produces T: Array, the compiler infers `<Arc<T> as From<T>>::from`
816                // (which then coerces to ArrayRef). If the builder produces ArrayRef directly, the
817                // compiler infers `<ArrayRef as From<ArrayRef>>::from` (no-op, From blanket impl).
818                Ok(Arc::from(self.builder.finish()))
819            }
820        }
821    }
822}
823
824define_variant_to_primitive_builder!(
825    struct VariantToStringArrowBuilder<'a, B: StringLikeArrayBuilder>
826    |capacity| -> B { B::with_capacity(capacity) },
827    |value| value.as_string(),
828    type_name: B::type_name()
829);
830
831define_variant_to_primitive_builder!(
832    struct VariantToBooleanArrowRowBuilder<'a>
833    |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
834    |value| value.as_boolean(),
835    type_name: datatypes::BooleanType::DATA_TYPE
836);
837
838define_variant_to_primitive_builder!(
839    struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant>
840    |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
841    |value| T::from_variant(value),
842    type_name: T::DATA_TYPE
843);
844
845define_variant_to_primitive_builder!(
846    struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant<true>>
847    |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
848    |value| T::from_variant(value),
849    type_name: T::DATA_TYPE
850);
851
852define_variant_to_primitive_builder!(
853    struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
854    |capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
855        PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
856    },
857    |value| T::from_variant(value),
858    type_name: T::DATA_TYPE
859);
860
861define_variant_to_primitive_builder!(
862    struct VariantToBinaryArrowRowBuilder<'a, B: BinaryLikeArrayBuilder>
863    |capacity| -> B { B::with_capacity(capacity) },
864    |value| value.as_u8_slice(),
865    type_name: B::type_name()
866);
867
868/// Builder for converting variant values to arrow Decimal values
869pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T>
870where
871    T: DecimalType,
872    T::Native: DecimalCast,
873{
874    builder: PrimitiveBuilder<T>,
875    cast_options: &'a CastOptions<'a>,
876    precision: u8,
877    scale: i8,
878}
879
880impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T>
881where
882    T: DecimalType,
883    T::Native: DecimalCast,
884{
885    fn new(
886        cast_options: &'a CastOptions<'a>,
887        capacity: usize,
888        precision: u8,
889        scale: i8,
890    ) -> Result<Self> {
891        let builder = PrimitiveBuilder::<T>::with_capacity(capacity)
892            .with_precision_and_scale(precision, scale)?;
893        Ok(Self {
894            builder,
895            cast_options,
896            precision,
897            scale,
898        })
899    }
900
901    fn append_null(&mut self) -> Result<()> {
902        self.builder.append_null();
903        Ok(())
904    }
905
906    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
907        match variant_cast_with_options(value, self.cast_options, |value| {
908            variant_to_unscaled_decimal::<T>(value, self.precision, self.scale)
909        }) {
910            Ok(Some(scaled)) => {
911                self.builder.append_value(scaled);
912                Ok(true)
913            }
914            Ok(None) => {
915                self.builder.append_null();
916                Ok(false)
917            }
918            Err(_) => Err(ArrowError::CastError(format!(
919                "Failed to cast to {prefix}(precision={precision}, scale={scale}) from variant {value:?}",
920                prefix = T::PREFIX,
921                precision = self.precision,
922                scale = self.scale
923            ))),
924        }
925    }
926
927    fn finish(mut self) -> Result<ArrayRef> {
928        Ok(Arc::new(self.builder.finish()))
929    }
930}
931
932/// Builder for converting variant values to FixedSizeBinary(16) for UUIDs
933pub(crate) struct VariantToUuidArrowRowBuilder<'a> {
934    builder: FixedSizeBinaryBuilder,
935    cast_options: &'a CastOptions<'a>,
936}
937
938impl<'a> VariantToUuidArrowRowBuilder<'a> {
939    fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
940        Self {
941            builder: FixedSizeBinaryBuilder::with_capacity(capacity, 16),
942            cast_options,
943        }
944    }
945
946    fn append_null(&mut self) -> Result<()> {
947        self.builder.append_null();
948        Ok(())
949    }
950
951    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
952        match variant_cast_with_options(value, self.cast_options, Variant::as_uuid) {
953            Ok(Some(uuid)) => {
954                self.builder
955                    .append_value(uuid.as_bytes())
956                    .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
957                Ok(true)
958            }
959            Ok(None) => {
960                self.builder.append_null();
961                Ok(false)
962            }
963            Err(_) => Err(ArrowError::CastError(format!(
964                "Failed to extract UUID from variant {value:?}"
965            ))),
966        }
967    }
968
969    fn finish(mut self) -> Result<ArrayRef> {
970        Ok(Arc::new(self.builder.finish()))
971    }
972}
973
974/// Element builder for list variants, supporting both typed (for [`crate::variant_get()`])
975/// and shredded (for [`crate::shred_variant()`]) output modes.
976enum ListElementBuilder<'a> {
977    /// Produces the target array type directly.
978    Typed(Box<VariantToArrowRowBuilder<'a>>),
979    /// Produces a shredded struct with `value` and `typed_value` fields.
980    Shredded(Box<VariantToShreddedVariantRowBuilder<'a>>),
981}
982
983impl<'a> ListElementBuilder<'a> {
984    fn append_null(&mut self) -> Result<()> {
985        match self {
986            Self::Typed(b) => b.append_null(),
987            Self::Shredded(b) => b.append_null(),
988        }
989    }
990
991    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
992        match self {
993            Self::Typed(b) => b.append_value(value),
994            Self::Shredded(b) => b.append_value(value),
995        }
996    }
997
998    fn finish(self) -> Result<ArrayRef> {
999        match self {
1000            Self::Typed(b) => b.finish(),
1001            Self::Shredded(b) => {
1002                let (value, typed_value, nulls) = b.finish()?;
1003                Ok(ArrayRef::from(ShreddedVariantFieldArray::from_parts(
1004                    Some(Arc::new(value)),
1005                    Some(typed_value),
1006                    nulls,
1007                )))
1008            }
1009        }
1010    }
1011}
1012
1013pub(crate) struct VariantToListArrowRowBuilder<'a, O, const IS_VIEW: bool>
1014where
1015    O: OffsetSizeTrait + ArrowNativeTypeOp,
1016{
1017    field: FieldRef,
1018    offsets: Vec<O>,
1019    element_builder: ListElementBuilder<'a>,
1020    nulls: NullBufferBuilder,
1021    current_offset: O,
1022    cast_options: &'a CastOptions<'a>,
1023}
1024
1025impl<'a, O, const IS_VIEW: bool> VariantToListArrowRowBuilder<'a, O, IS_VIEW>
1026where
1027    O: OffsetSizeTrait + ArrowNativeTypeOp,
1028{
1029    fn try_new(
1030        field: FieldRef,
1031        element_data_type: &'a DataType,
1032        cast_options: &'a CastOptions,
1033        capacity: usize,
1034        shredded: bool,
1035    ) -> Result<Self> {
1036        if capacity >= isize::MAX as usize {
1037            return Err(ArrowError::ComputeError(
1038                "Capacity exceeds isize::MAX when reserving list offsets".to_string(),
1039            ));
1040        }
1041        let mut offsets = Vec::with_capacity(capacity + 1);
1042        offsets.push(O::ZERO);
1043        let element_builder = if shredded {
1044            let builder = make_variant_to_shredded_variant_arrow_row_builder(
1045                element_data_type,
1046                cast_options,
1047                capacity,
1048                NullValue::ArrayElement,
1049            )?;
1050            ListElementBuilder::Shredded(Box::new(builder))
1051        } else {
1052            let builder =
1053                make_typed_variant_to_arrow_row_builder(element_data_type, cast_options, capacity)?;
1054            ListElementBuilder::Typed(Box::new(builder))
1055        };
1056
1057        Ok(Self {
1058            field,
1059            offsets,
1060            element_builder,
1061            nulls: NullBufferBuilder::new(capacity),
1062            current_offset: O::ZERO,
1063            cast_options,
1064        })
1065    }
1066
1067    fn append_null(&mut self) -> Result<()> {
1068        self.offsets.push(self.current_offset);
1069        self.nulls.append_null();
1070        Ok(())
1071    }
1072
1073    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
1074        match variant_cast_with_options(value, self.cast_options, Variant::as_list) {
1075            Ok(Some(list)) => {
1076                for element in list.iter() {
1077                    self.element_builder.append_value(element)?;
1078                    self.current_offset = self.current_offset.add_checked(O::ONE)?;
1079                }
1080                self.offsets.push(self.current_offset);
1081                self.nulls.append_non_null();
1082                Ok(true)
1083            }
1084            Ok(None) => {
1085                self.append_null()?;
1086                Ok(false)
1087            }
1088            Err(_) => Err(ArrowError::CastError(format!(
1089                "Failed to extract list from variant {value:?}"
1090            ))),
1091        }
1092    }
1093
1094    fn finish(mut self) -> Result<ArrayRef> {
1095        let element_array: ArrayRef = self.element_builder.finish()?;
1096        let field = Arc::new(
1097            self.field
1098                .as_ref()
1099                .clone()
1100                .with_data_type(element_array.data_type().clone()),
1101        );
1102
1103        if IS_VIEW {
1104            // NOTE: `offsets` is never empty (constructor pushes an entry)
1105            let mut sizes = Vec::with_capacity(self.offsets.len() - 1);
1106            for i in 1..self.offsets.len() {
1107                sizes.push(self.offsets[i] - self.offsets[i - 1]);
1108            }
1109            self.offsets.pop();
1110            let list_view_array = GenericListViewArray::<O>::new(
1111                field,
1112                ScalarBuffer::from(self.offsets),
1113                ScalarBuffer::from(sizes),
1114                element_array,
1115                self.nulls.finish(),
1116            );
1117            Ok(Arc::new(list_view_array))
1118        } else {
1119            let list_array = GenericListArray::<O>::new(
1120                field,
1121                OffsetBuffer::<O>::new(ScalarBuffer::from(self.offsets)),
1122                element_array,
1123                self.nulls.finish(),
1124            );
1125            Ok(Arc::new(list_array))
1126        }
1127    }
1128}
1129
1130pub(crate) struct VariantToFixedSizeListArrowRowBuilder<'a> {
1131    field: FieldRef,
1132    list_size: i32,
1133    element_builder: ListElementBuilder<'a>,
1134    nulls: NullBufferBuilder,
1135    cast_options: &'a CastOptions<'a>,
1136    shredded: bool,
1137}
1138
1139impl<'a> VariantToFixedSizeListArrowRowBuilder<'a> {
1140    fn try_new(
1141        field: FieldRef,
1142        element_data_type: &'a DataType,
1143        list_size: i32,
1144        cast_options: &'a CastOptions,
1145        capacity: usize,
1146        shredded: bool,
1147    ) -> Result<Self> {
1148        let element_builder = if shredded {
1149            let builder = make_variant_to_shredded_variant_arrow_row_builder(
1150                element_data_type,
1151                cast_options,
1152                capacity,
1153                NullValue::ArrayElement,
1154            )?;
1155            ListElementBuilder::Shredded(Box::new(builder))
1156        } else {
1157            let builder =
1158                make_typed_variant_to_arrow_row_builder(element_data_type, cast_options, capacity)?;
1159            ListElementBuilder::Typed(Box::new(builder))
1160        };
1161        Ok(Self {
1162            field,
1163            list_size,
1164            element_builder,
1165            nulls: NullBufferBuilder::new(capacity),
1166            cast_options,
1167            shredded,
1168        })
1169    }
1170
1171    fn append_null(&mut self) -> Result<()> {
1172        for _ in 0..self.list_size {
1173            self.element_builder.append_null()?;
1174        }
1175        self.nulls.append_null();
1176        Ok(())
1177    }
1178
1179    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
1180        match variant_cast_with_options(value, self.cast_options, Variant::as_list) {
1181            Ok(Some(list)) => {
1182                let len = list.len();
1183                if len != self.list_size as usize {
1184                    if self.cast_options.safe && !self.shredded {
1185                        self.append_null()?;
1186                        return Ok(false);
1187                    }
1188                    return Err(ArrowError::CastError(format!(
1189                        "Expected fixed size list of size {}, got size {}",
1190                        self.list_size, len
1191                    )));
1192                }
1193                for element in list.iter() {
1194                    self.element_builder.append_value(element)?;
1195                }
1196                self.nulls.append_non_null();
1197                Ok(true)
1198            }
1199            Ok(None) => {
1200                self.append_null()?;
1201                Ok(false)
1202            }
1203            Err(_) => Err(ArrowError::CastError(format!(
1204                "Failed to extract list from variant {value:?}"
1205            ))),
1206        }
1207    }
1208
1209    fn finish(mut self) -> Result<ArrayRef> {
1210        let element_array: ArrayRef = self.element_builder.finish()?;
1211        let field = Arc::new(
1212            self.field
1213                .as_ref()
1214                .clone()
1215                .with_data_type(element_array.data_type().clone()),
1216        );
1217        let fixed_size_list_array =
1218            FixedSizeListArray::try_new(field, self.list_size, element_array, self.nulls.finish())?;
1219        Ok(Arc::new(fixed_size_list_array))
1220    }
1221}
1222
1223/// Builder for creating VariantArray output (for path extraction without type conversion)
1224pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
1225    metadata: ArrayRef,
1226    builder: VariantValueArrayBuilder,
1227    nulls: NullBufferBuilder,
1228}
1229
1230impl VariantToBinaryVariantArrowRowBuilder {
1231    fn new(metadata: ArrayRef, capacity: usize) -> Self {
1232        Self {
1233            metadata,
1234            builder: VariantValueArrayBuilder::new(capacity),
1235            nulls: NullBufferBuilder::new(capacity),
1236        }
1237    }
1238}
1239
1240impl VariantToBinaryVariantArrowRowBuilder {
1241    fn append_null(&mut self) -> Result<()> {
1242        self.builder.append_null();
1243        self.nulls.append_null();
1244        Ok(())
1245    }
1246
1247    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
1248        self.builder.append_value(value);
1249        self.nulls.append_non_null();
1250        Ok(true)
1251    }
1252
1253    fn finish(mut self) -> Result<ArrayRef> {
1254        let variant_array = VariantArray::from_parts(
1255            self.metadata,
1256            Some(Arc::new(self.builder.build()?)),
1257            None, // no typed_value column
1258            self.nulls.finish(),
1259        );
1260
1261        Ok(ArrayRef::from(variant_array))
1262    }
1263}
1264
1265#[derive(Default)]
1266struct FakeNullBuilder {
1267    item_count: usize,
1268}
1269
1270impl FakeNullBuilder {
1271    fn append_value(&mut self, _: ()) {
1272        self.item_count += 1;
1273    }
1274
1275    fn append_null(&mut self) {
1276        self.item_count += 1;
1277    }
1278
1279    fn finish(self) -> NullArray {
1280        NullArray::new(self.item_count)
1281    }
1282}
1283
1284define_variant_to_primitive_builder!(
1285    struct VariantToNullArrowRowBuilder<'a>
1286    |_capacity| -> FakeNullBuilder { FakeNullBuilder::default() },
1287    |value| value.as_null(),
1288    type_name: "Null"
1289);
1290
1291#[cfg(test)]
1292mod tests {
1293    use super::{
1294        make_primitive_variant_to_arrow_row_builder, make_typed_variant_to_arrow_row_builder,
1295    };
1296    use arrow::array::{
1297        Array, Decimal32Array, FixedSizeBinaryArray, Int32Array, ListArray, StructArray,
1298    };
1299    use arrow::compute::CastOptions;
1300    use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode};
1301    use arrow::error::ArrowError;
1302    use parquet_variant::{Variant, VariantDecimal4};
1303    use std::sync::Arc;
1304    use uuid::Uuid;
1305
1306    #[test]
1307    fn make_primitive_builder_rejects_non_primitive_types() {
1308        let cast_options = CastOptions::default();
1309        let item_field = Arc::new(Field::new("item", DataType::Int32, true));
1310        let struct_fields = Fields::from(vec![Field::new("child", DataType::Int32, true)]);
1311        let map_entries_field = Arc::new(Field::new(
1312            "entries",
1313            DataType::Struct(Fields::from(vec![
1314                Field::new("key", DataType::Utf8, false),
1315                Field::new("value", DataType::Float64, true),
1316            ])),
1317            true,
1318        ));
1319        let union_fields =
1320            UnionFields::try_new(vec![1], vec![Field::new("child", DataType::Int32, true)])
1321                .unwrap();
1322        let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false));
1323        let ree_values_field = Arc::new(Field::new("values", DataType::Utf8, true));
1324
1325        let non_primitive_types = vec![
1326            DataType::List(item_field.clone()),
1327            DataType::LargeList(item_field.clone()),
1328            DataType::ListView(item_field.clone()),
1329            DataType::LargeListView(item_field.clone()),
1330            DataType::FixedSizeList(item_field.clone(), 2),
1331            DataType::Struct(struct_fields.clone()),
1332            DataType::Map(map_entries_field.clone(), false),
1333            DataType::Union(union_fields.clone(), UnionMode::Dense),
1334            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1335            DataType::RunEndEncoded(run_ends_field.clone(), ree_values_field.clone()),
1336        ];
1337
1338        for data_type in non_primitive_types {
1339            let err =
1340                match make_primitive_variant_to_arrow_row_builder(&data_type, &cast_options, 1) {
1341                    Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"),
1342                    Err(err) => err,
1343                };
1344
1345            match err {
1346                ArrowError::InvalidArgumentError(msg) => {
1347                    assert!(msg.contains(&format!("{data_type:?}")));
1348                }
1349                other => panic!("expected InvalidArgumentError, got {other:?}"),
1350            }
1351        }
1352    }
1353
1354    #[test]
1355    fn strict_cast_allows_variant_null_for_primitive_builder() {
1356        let cast_options = CastOptions {
1357            safe: false,
1358            ..Default::default()
1359        };
1360        let mut builder =
1361            make_primitive_variant_to_arrow_row_builder(&DataType::Int32, &cast_options, 2)
1362                .unwrap();
1363
1364        assert!(!builder.append_value(&Variant::Null).unwrap());
1365        assert!(builder.append_value(&Variant::Int32(42)).unwrap());
1366
1367        let array = builder.finish().unwrap();
1368        let int_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1369        assert!(int_array.is_null(0));
1370        assert_eq!(int_array.value(1), 42);
1371    }
1372
1373    #[test]
1374    fn strict_cast_allows_variant_null_for_decimal_builder() {
1375        let cast_options = CastOptions {
1376            safe: false,
1377            ..Default::default()
1378        };
1379        let mut builder = make_primitive_variant_to_arrow_row_builder(
1380            &DataType::Decimal32(9, 2),
1381            &cast_options,
1382            2,
1383        )
1384        .unwrap();
1385        let decimal_variant: Variant<'_, '_> = VariantDecimal4::try_new(1234, 2).unwrap().into();
1386
1387        assert!(!builder.append_value(&Variant::Null).unwrap());
1388        assert!(builder.append_value(&decimal_variant).unwrap());
1389
1390        let array = builder.finish().unwrap();
1391        let decimal_array = array.as_any().downcast_ref::<Decimal32Array>().unwrap();
1392        assert!(decimal_array.is_null(0));
1393        assert_eq!(decimal_array.value(1), 1234);
1394    }
1395
1396    #[test]
1397    fn strict_cast_allows_variant_null_for_uuid_builder() {
1398        let cast_options = CastOptions {
1399            safe: false,
1400            ..Default::default()
1401        };
1402        let mut builder = make_primitive_variant_to_arrow_row_builder(
1403            &DataType::FixedSizeBinary(16),
1404            &cast_options,
1405            2,
1406        )
1407        .unwrap();
1408        let uuid = Uuid::nil();
1409
1410        assert!(!builder.append_value(&Variant::Null).unwrap());
1411        assert!(builder.append_value(&Variant::Uuid(uuid)).unwrap());
1412
1413        let array = builder.finish().unwrap();
1414        let uuid_array = array
1415            .as_any()
1416            .downcast_ref::<FixedSizeBinaryArray>()
1417            .unwrap();
1418        assert!(uuid_array.is_null(0));
1419        assert_eq!(uuid_array.value(1), uuid.as_bytes());
1420    }
1421
1422    #[test]
1423    fn strict_cast_allows_variant_null_for_list_and_struct_builders() {
1424        let cast_options = CastOptions {
1425            safe: false,
1426            ..Default::default()
1427        };
1428
1429        let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
1430        let mut list_builder =
1431            make_typed_variant_to_arrow_row_builder(&list_type, &cast_options, 1).unwrap();
1432        assert!(!list_builder.append_value(Variant::Null).unwrap());
1433        let list_array = list_builder.finish().unwrap();
1434        let list_array = list_array.as_any().downcast_ref::<ListArray>().unwrap();
1435        assert!(list_array.is_null(0));
1436
1437        let struct_type =
1438            DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)]));
1439        let mut struct_builder =
1440            make_typed_variant_to_arrow_row_builder(&struct_type, &cast_options, 1).unwrap();
1441        assert!(!struct_builder.append_value(Variant::Null).unwrap());
1442        let struct_array = struct_builder.finish().unwrap();
1443        let struct_array = struct_array.as_any().downcast_ref::<StructArray>().unwrap();
1444        assert!(struct_array.is_null(0));
1445    }
1446}