parquet_variant_compute/
variant_to_arrow.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::{
19    ArrayRef, BinaryBuilder, BinaryLikeArrayBuilder, BinaryViewArray, BinaryViewBuilder,
20    BooleanBuilder, FixedSizeBinaryBuilder, LargeBinaryBuilder, LargeStringBuilder, NullArray,
21    NullBufferBuilder, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder,
22};
23use arrow::compute::{CastOptions, DecimalCast};
24use arrow::datatypes::{self, DataType, DecimalType};
25use arrow::error::{ArrowError, Result};
26use parquet_variant::{Variant, VariantPath};
27
28use crate::type_conversion::{
29    PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal,
30};
31use crate::{VariantArray, VariantValueArrayBuilder};
32
33use arrow_schema::TimeUnit;
34use std::sync::Arc;
35
36/// Builder for converting primitive variant values to Arrow arrays. It is used by both
37/// `VariantToArrowRowBuilder` (below) and `VariantToShreddedPrimitiveVariantRowBuilder` (in
38/// `shred_variant.rs`).
39pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
40    Null(VariantToNullArrowRowBuilder<'a>),
41    Boolean(VariantToBooleanArrowRowBuilder<'a>),
42    Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
43    Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
44    Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
45    Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>),
46    UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
47    UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
48    UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
49    UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
50    Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
51    Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
52    Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
53    Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>),
54    Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>),
55    Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>),
56    Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>),
57    TimestampSecond(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampSecondType>),
58    TimestampSecondNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampSecondType>),
59    TimestampMilli(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMillisecondType>),
60    TimestampMilliNtz(
61        VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMillisecondType>,
62    ),
63    TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>),
64    TimestampMicroNtz(
65        VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>,
66    ),
67    TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
68    TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
69    Time32Second(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32SecondType>),
70    Time32Milli(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32MillisecondType>),
71    Time64Micro(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>),
72    Time64Nano(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64NanosecondType>),
73    Date32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
74    Date64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date64Type>),
75    Uuid(VariantToUuidArrowRowBuilder<'a>),
76    String(VariantToStringArrowBuilder<'a, StringBuilder>),
77    LargeString(VariantToStringArrowBuilder<'a, LargeStringBuilder>),
78    StringView(VariantToStringArrowBuilder<'a, StringViewBuilder>),
79    Binary(VariantToBinaryArrowRowBuilder<'a, BinaryBuilder>),
80    LargeBinary(VariantToBinaryArrowRowBuilder<'a, LargeBinaryBuilder>),
81    BinaryView(VariantToBinaryArrowRowBuilder<'a, BinaryViewBuilder>),
82}
83
84/// Builder for converting variant values into strongly typed Arrow arrays.
85///
86/// Useful for variant_get kernels that need to extract specific paths from variant values, possibly
87/// with casting of leaf values to specific types.
88pub(crate) enum VariantToArrowRowBuilder<'a> {
89    Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
90    BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
91
92    // Path extraction wrapper - contains a boxed enum for any of the above
93    WithPath(VariantPathRowBuilder<'a>),
94}
95
96impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
97    pub fn append_null(&mut self) -> Result<()> {
98        use PrimitiveVariantToArrowRowBuilder::*;
99        match self {
100            Null(b) => b.append_null(),
101            Boolean(b) => b.append_null(),
102            Int8(b) => b.append_null(),
103            Int16(b) => b.append_null(),
104            Int32(b) => b.append_null(),
105            Int64(b) => b.append_null(),
106            UInt8(b) => b.append_null(),
107            UInt16(b) => b.append_null(),
108            UInt32(b) => b.append_null(),
109            UInt64(b) => b.append_null(),
110            Float16(b) => b.append_null(),
111            Float32(b) => b.append_null(),
112            Float64(b) => b.append_null(),
113            Decimal32(b) => b.append_null(),
114            Decimal64(b) => b.append_null(),
115            Decimal128(b) => b.append_null(),
116            Decimal256(b) => b.append_null(),
117            TimestampSecond(b) => b.append_null(),
118            TimestampSecondNtz(b) => b.append_null(),
119            TimestampMilli(b) => b.append_null(),
120            TimestampMilliNtz(b) => b.append_null(),
121            TimestampMicro(b) => b.append_null(),
122            TimestampMicroNtz(b) => b.append_null(),
123            TimestampNano(b) => b.append_null(),
124            TimestampNanoNtz(b) => b.append_null(),
125            Time32Second(b) => b.append_null(),
126            Time32Milli(b) => b.append_null(),
127            Time64Micro(b) => b.append_null(),
128            Time64Nano(b) => b.append_null(),
129            Date32(b) => b.append_null(),
130            Date64(b) => b.append_null(),
131            Uuid(b) => b.append_null(),
132            String(b) => b.append_null(),
133            LargeString(b) => b.append_null(),
134            StringView(b) => b.append_null(),
135            Binary(b) => b.append_null(),
136            LargeBinary(b) => b.append_null(),
137            BinaryView(b) => b.append_null(),
138        }
139    }
140
141    pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
142        use PrimitiveVariantToArrowRowBuilder::*;
143        match self {
144            Null(b) => b.append_value(value),
145            Boolean(b) => b.append_value(value),
146            Int8(b) => b.append_value(value),
147            Int16(b) => b.append_value(value),
148            Int32(b) => b.append_value(value),
149            Int64(b) => b.append_value(value),
150            UInt8(b) => b.append_value(value),
151            UInt16(b) => b.append_value(value),
152            UInt32(b) => b.append_value(value),
153            UInt64(b) => b.append_value(value),
154            Float16(b) => b.append_value(value),
155            Float32(b) => b.append_value(value),
156            Float64(b) => b.append_value(value),
157            Decimal32(b) => b.append_value(value),
158            Decimal64(b) => b.append_value(value),
159            Decimal128(b) => b.append_value(value),
160            Decimal256(b) => b.append_value(value),
161            TimestampSecond(b) => b.append_value(value),
162            TimestampSecondNtz(b) => b.append_value(value),
163            TimestampMilli(b) => b.append_value(value),
164            TimestampMilliNtz(b) => b.append_value(value),
165            TimestampMicro(b) => b.append_value(value),
166            TimestampMicroNtz(b) => b.append_value(value),
167            TimestampNano(b) => b.append_value(value),
168            TimestampNanoNtz(b) => b.append_value(value),
169            Time32Second(b) => b.append_value(value),
170            Time32Milli(b) => b.append_value(value),
171            Time64Micro(b) => b.append_value(value),
172            Time64Nano(b) => b.append_value(value),
173            Date32(b) => b.append_value(value),
174            Date64(b) => b.append_value(value),
175            Uuid(b) => b.append_value(value),
176            String(b) => b.append_value(value),
177            LargeString(b) => b.append_value(value),
178            StringView(b) => b.append_value(value),
179            Binary(b) => b.append_value(value),
180            LargeBinary(b) => b.append_value(value),
181            BinaryView(b) => b.append_value(value),
182        }
183    }
184
185    pub fn finish(self) -> Result<ArrayRef> {
186        use PrimitiveVariantToArrowRowBuilder::*;
187        match self {
188            Null(b) => b.finish(),
189            Boolean(b) => b.finish(),
190            Int8(b) => b.finish(),
191            Int16(b) => b.finish(),
192            Int32(b) => b.finish(),
193            Int64(b) => b.finish(),
194            UInt8(b) => b.finish(),
195            UInt16(b) => b.finish(),
196            UInt32(b) => b.finish(),
197            UInt64(b) => b.finish(),
198            Float16(b) => b.finish(),
199            Float32(b) => b.finish(),
200            Float64(b) => b.finish(),
201            Decimal32(b) => b.finish(),
202            Decimal64(b) => b.finish(),
203            Decimal128(b) => b.finish(),
204            Decimal256(b) => b.finish(),
205            TimestampSecond(b) => b.finish(),
206            TimestampSecondNtz(b) => b.finish(),
207            TimestampMilli(b) => b.finish(),
208            TimestampMilliNtz(b) => b.finish(),
209            TimestampMicro(b) => b.finish(),
210            TimestampMicroNtz(b) => b.finish(),
211            TimestampNano(b) => b.finish(),
212            TimestampNanoNtz(b) => b.finish(),
213            Time32Second(b) => b.finish(),
214            Time32Milli(b) => b.finish(),
215            Time64Micro(b) => b.finish(),
216            Time64Nano(b) => b.finish(),
217            Date32(b) => b.finish(),
218            Date64(b) => b.finish(),
219            Uuid(b) => b.finish(),
220            String(b) => b.finish(),
221            LargeString(b) => b.finish(),
222            StringView(b) => b.finish(),
223            Binary(b) => b.finish(),
224            LargeBinary(b) => b.finish(),
225            BinaryView(b) => b.finish(),
226        }
227    }
228}
229
230impl<'a> VariantToArrowRowBuilder<'a> {
231    pub fn append_null(&mut self) -> Result<()> {
232        use VariantToArrowRowBuilder::*;
233        match self {
234            Primitive(b) => b.append_null(),
235            BinaryVariant(b) => b.append_null(),
236            WithPath(path_builder) => path_builder.append_null(),
237        }
238    }
239
240    pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
241        use VariantToArrowRowBuilder::*;
242        match self {
243            Primitive(b) => b.append_value(&value),
244            BinaryVariant(b) => b.append_value(value),
245            WithPath(path_builder) => path_builder.append_value(value),
246        }
247    }
248
249    pub fn finish(self) -> Result<ArrayRef> {
250        use VariantToArrowRowBuilder::*;
251        match self {
252            Primitive(b) => b.finish(),
253            BinaryVariant(b) => b.finish(),
254            WithPath(path_builder) => path_builder.finish(),
255        }
256    }
257}
258
259/// Creates a row builder that converts primitive `Variant` values into the requested Arrow data type.
260pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
261    data_type: &'a DataType,
262    cast_options: &'a CastOptions,
263    capacity: usize,
264) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
265    use PrimitiveVariantToArrowRowBuilder::*;
266
267    let builder =
268        match data_type {
269            DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
270            DataType::Boolean => {
271                Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity))
272            }
273            DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
274                cast_options,
275                capacity,
276            )),
277            DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
278                cast_options,
279                capacity,
280            )),
281            DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
282                cast_options,
283                capacity,
284            )),
285            DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
286                cast_options,
287                capacity,
288            )),
289            DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
290                cast_options,
291                capacity,
292            )),
293            DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
294                cast_options,
295                capacity,
296            )),
297            DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
298                cast_options,
299                capacity,
300            )),
301            DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
302                cast_options,
303                capacity,
304            )),
305            DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
306                cast_options,
307                capacity,
308            )),
309            DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
310                cast_options,
311                capacity,
312            )),
313            DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
314                cast_options,
315                capacity,
316            )),
317            DataType::Decimal32(precision, scale) => Decimal32(
318                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
319            ),
320            DataType::Decimal64(precision, scale) => Decimal64(
321                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
322            ),
323            DataType::Decimal128(precision, scale) => Decimal128(
324                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
325            ),
326            DataType::Decimal256(precision, scale) => Decimal256(
327                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
328            ),
329            DataType::Date32 => Date32(VariantToPrimitiveArrowRowBuilder::new(
330                cast_options,
331                capacity,
332            )),
333            DataType::Date64 => Date64(VariantToPrimitiveArrowRowBuilder::new(
334                cast_options,
335                capacity,
336            )),
337            DataType::Time32(TimeUnit::Second) => Time32Second(
338                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
339            ),
340            DataType::Time32(TimeUnit::Millisecond) => Time32Milli(
341                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
342            ),
343            DataType::Time32(t) => {
344                return Err(ArrowError::InvalidArgumentError(format!(
345                    "The unit for Time32 must be second/millisecond, received {t:?}"
346                )));
347            }
348            DataType::Time64(TimeUnit::Microsecond) => Time64Micro(
349                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
350            ),
351            DataType::Time64(TimeUnit::Nanosecond) => Time64Nano(
352                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
353            ),
354            DataType::Time64(t) => {
355                return Err(ArrowError::InvalidArgumentError(format!(
356                    "The unit for Time64 must be micro/nano seconds, received {t:?}"
357                )));
358            }
359            DataType::Timestamp(TimeUnit::Second, None) => TimestampSecondNtz(
360                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
361            ),
362            DataType::Timestamp(TimeUnit::Second, tz) => TimestampSecond(
363                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
364            ),
365            DataType::Timestamp(TimeUnit::Millisecond, None) => TimestampMilliNtz(
366                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
367            ),
368            DataType::Timestamp(TimeUnit::Millisecond, tz) => TimestampMilli(
369                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
370            ),
371            DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
372                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
373            ),
374            DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
375                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
376            ),
377            DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
378                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
379            ),
380            DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
381                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
382            ),
383            DataType::Duration(_) | DataType::Interval(_) => {
384                return Err(ArrowError::InvalidArgumentError(
385                    "Casting Variant to duration/interval types is not supported. \
386                    The Variant format does not define duration/interval types."
387                        .to_string(),
388                ));
389            }
390            DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)),
391            DataType::LargeBinary => {
392                LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
393            }
394            DataType::BinaryView => {
395                BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
396            }
397            DataType::FixedSizeBinary(16) => {
398                Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
399            }
400            DataType::FixedSizeBinary(_) => {
401                return Err(ArrowError::NotYetImplemented(format!(
402                    "DataType {data_type:?} not yet implemented"
403                )));
404            }
405            DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)),
406            DataType::LargeUtf8 => {
407                LargeString(VariantToStringArrowBuilder::new(cast_options, capacity))
408            }
409            DataType::Utf8View => {
410                StringView(VariantToStringArrowBuilder::new(cast_options, capacity))
411            }
412            DataType::List(_)
413            | DataType::LargeList(_)
414            | DataType::ListView(_)
415            | DataType::LargeListView(_)
416            | DataType::FixedSizeList(..)
417            | DataType::Struct(_)
418            | DataType::Map(..)
419            | DataType::Union(..)
420            | DataType::Dictionary(..)
421            | DataType::RunEndEncoded(..) => {
422                return Err(ArrowError::InvalidArgumentError(format!(
423                    "Casting to {data_type:?} is not applicable for primitive Variant types"
424                )));
425            }
426        };
427    Ok(builder)
428}
429
430pub(crate) fn make_variant_to_arrow_row_builder<'a>(
431    metadata: &BinaryViewArray,
432    path: VariantPath<'a>,
433    data_type: Option<&'a DataType>,
434    cast_options: &'a CastOptions,
435    capacity: usize,
436) -> Result<VariantToArrowRowBuilder<'a>> {
437    use VariantToArrowRowBuilder::*;
438
439    let mut builder = match data_type {
440        // If no data type was requested, build an unshredded VariantArray.
441        None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
442            metadata.clone(),
443            capacity,
444        )),
445        Some(DataType::Struct(_)) => {
446            return Err(ArrowError::NotYetImplemented(
447                "Converting unshredded variant objects to arrow structs".to_string(),
448            ));
449        }
450        Some(
451            DataType::List(_)
452            | DataType::LargeList(_)
453            | DataType::ListView(_)
454            | DataType::LargeListView(_)
455            | DataType::FixedSizeList(..),
456        ) => {
457            return Err(ArrowError::NotYetImplemented(
458                "Converting unshredded variant arrays to arrow lists".to_string(),
459            ));
460        }
461        Some(data_type) => {
462            let builder =
463                make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
464            Primitive(builder)
465        }
466    };
467
468    // Wrap with path extraction if needed
469    if !path.is_empty() {
470        builder = WithPath(VariantPathRowBuilder {
471            builder: Box::new(builder),
472            path,
473        })
474    };
475
476    Ok(builder)
477}
478
479/// A thin wrapper whose only job is to extract a specific path from a variant value and pass the
480/// result to a nested builder.
481pub(crate) struct VariantPathRowBuilder<'a> {
482    builder: Box<VariantToArrowRowBuilder<'a>>,
483    path: VariantPath<'a>,
484}
485
486impl<'a> VariantPathRowBuilder<'a> {
487    fn append_null(&mut self) -> Result<()> {
488        self.builder.append_null()
489    }
490
491    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
492        if let Some(v) = value.get_path(&self.path) {
493            self.builder.append_value(v)
494        } else {
495            self.builder.append_null()?;
496            Ok(false)
497        }
498    }
499
500    fn finish(self) -> Result<ArrayRef> {
501        self.builder.finish()
502    }
503}
504
505macro_rules! define_variant_to_primitive_builder {
506    (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
507    |$array_param:ident $(, $field:ident: $field_type:ty)?| -> $builder_name:ident $(< $array_type:ty >)? { $init_expr: expr },
508    |$value: ident| $value_transform:expr,
509    type_name: $type_name:expr) => {
510        pub(crate) struct $name<$lifetime $(, $generic : $bound )?>
511        {
512            builder: $builder_name $(<$array_type>)?,
513            cast_options: &$lifetime CastOptions<$lifetime>,
514        }
515
516        impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic )?> {
517            fn new(
518                cast_options: &$lifetime CastOptions<$lifetime>,
519                $array_param: usize,
520                // add this so that $init_expr can use it
521                $( $field: $field_type, )?
522            ) -> Self {
523                Self {
524                    builder: $init_expr,
525                    cast_options,
526                }
527            }
528
529            fn append_null(&mut self) -> Result<()> {
530                self.builder.append_null();
531                Ok(())
532            }
533
534            fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result<bool> {
535                if let Some(v) = $value_transform {
536                    self.builder.append_value(v);
537                    Ok(true)
538                } else {
539                    if !self.cast_options.safe {
540                        // Unsafe casting: return error on conversion failure
541                        return Err(ArrowError::CastError(format!(
542                            "Failed to extract primitive of type {} from variant {:?} at path VariantPath([])",
543                            $type_name,
544                            $value
545                        )));
546                    }
547                    // Safe casting: append null on conversion failure
548                    self.builder.append_null();
549                    Ok(false)
550                }
551            }
552
553            // Add this to silence unused mut warning from macro-generated code
554            // This is mainly for `FakeNullBuilder`
555            #[allow(unused_mut)]
556            fn finish(mut self) -> Result<ArrayRef> {
557                Ok(Arc::new(self.builder.finish()))
558            }
559        }
560    }
561}
562
563define_variant_to_primitive_builder!(
564    struct VariantToStringArrowBuilder<'a, B: StringLikeArrayBuilder>
565    |capacity| -> B { B::with_capacity(capacity) },
566    |value| value.as_string(),
567    type_name: B::type_name()
568);
569
570define_variant_to_primitive_builder!(
571    struct VariantToBooleanArrowRowBuilder<'a>
572    |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
573    |value|  value.as_boolean(),
574    type_name: datatypes::BooleanType::DATA_TYPE
575);
576
577define_variant_to_primitive_builder!(
578    struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant>
579    |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
580    |value| T::from_variant(value),
581    type_name: T::DATA_TYPE
582);
583
584define_variant_to_primitive_builder!(
585    struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant<true>>
586    |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
587    |value| T::from_variant(value),
588    type_name: T::DATA_TYPE
589);
590
591define_variant_to_primitive_builder!(
592    struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
593    |capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
594        PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
595    },
596    |value| T::from_variant(value),
597    type_name: T::DATA_TYPE
598);
599
600define_variant_to_primitive_builder!(
601    struct VariantToBinaryArrowRowBuilder<'a, B: BinaryLikeArrayBuilder>
602    |capacity| -> B { B::with_capacity(capacity) },
603    |value| value.as_u8_slice(),
604    type_name: B::type_name()
605);
606
607/// Builder for converting variant values to arrow Decimal values
608pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T>
609where
610    T: DecimalType,
611    T::Native: DecimalCast,
612{
613    builder: PrimitiveBuilder<T>,
614    cast_options: &'a CastOptions<'a>,
615    precision: u8,
616    scale: i8,
617}
618
619impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T>
620where
621    T: DecimalType,
622    T::Native: DecimalCast,
623{
624    fn new(
625        cast_options: &'a CastOptions<'a>,
626        capacity: usize,
627        precision: u8,
628        scale: i8,
629    ) -> Result<Self> {
630        let builder = PrimitiveBuilder::<T>::with_capacity(capacity)
631            .with_precision_and_scale(precision, scale)?;
632        Ok(Self {
633            builder,
634            cast_options,
635            precision,
636            scale,
637        })
638    }
639
640    fn append_null(&mut self) -> Result<()> {
641        self.builder.append_null();
642        Ok(())
643    }
644
645    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
646        if let Some(scaled) = variant_to_unscaled_decimal::<T>(value, self.precision, self.scale) {
647            self.builder.append_value(scaled);
648            Ok(true)
649        } else if self.cast_options.safe {
650            self.builder.append_null();
651            Ok(false)
652        } else {
653            Err(ArrowError::CastError(format!(
654                "Failed to cast to {}(precision={}, scale={}) from variant {:?}",
655                T::PREFIX,
656                self.precision,
657                self.scale,
658                value
659            )))
660        }
661    }
662
663    fn finish(mut self) -> Result<ArrayRef> {
664        Ok(Arc::new(self.builder.finish()))
665    }
666}
667
668/// Builder for converting variant values to FixedSizeBinary(16) for UUIDs
669pub(crate) struct VariantToUuidArrowRowBuilder<'a> {
670    builder: FixedSizeBinaryBuilder,
671    cast_options: &'a CastOptions<'a>,
672}
673
674impl<'a> VariantToUuidArrowRowBuilder<'a> {
675    fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
676        Self {
677            builder: FixedSizeBinaryBuilder::with_capacity(capacity, 16),
678            cast_options,
679        }
680    }
681
682    fn append_null(&mut self) -> Result<()> {
683        self.builder.append_null();
684        Ok(())
685    }
686
687    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
688        match value.as_uuid() {
689            Some(uuid) => {
690                self.builder
691                    .append_value(uuid.as_bytes())
692                    .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
693
694                Ok(true)
695            }
696            None if self.cast_options.safe => {
697                self.builder.append_null();
698                Ok(false)
699            }
700            None => Err(ArrowError::CastError(format!(
701                "Failed to extract UUID from variant {value:?}",
702            ))),
703        }
704    }
705
706    fn finish(mut self) -> Result<ArrayRef> {
707        Ok(Arc::new(self.builder.finish()))
708    }
709}
710
711/// Builder for creating VariantArray output (for path extraction without type conversion)
712pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
713    metadata: BinaryViewArray,
714    builder: VariantValueArrayBuilder,
715    nulls: NullBufferBuilder,
716}
717
718impl VariantToBinaryVariantArrowRowBuilder {
719    fn new(metadata: BinaryViewArray, capacity: usize) -> Self {
720        Self {
721            metadata,
722            builder: VariantValueArrayBuilder::new(capacity),
723            nulls: NullBufferBuilder::new(capacity),
724        }
725    }
726}
727
728impl VariantToBinaryVariantArrowRowBuilder {
729    fn append_null(&mut self) -> Result<()> {
730        self.builder.append_null();
731        self.nulls.append_null();
732        Ok(())
733    }
734
735    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
736        self.builder.append_value(value);
737        self.nulls.append_non_null();
738        Ok(true)
739    }
740
741    fn finish(mut self) -> Result<ArrayRef> {
742        let variant_array = VariantArray::from_parts(
743            self.metadata,
744            Some(self.builder.build()?),
745            None, // no typed_value column
746            self.nulls.finish(),
747        );
748
749        Ok(ArrayRef::from(variant_array))
750    }
751}
752
753#[derive(Default)]
754struct FakeNullBuilder {
755    item_count: usize,
756}
757
758impl FakeNullBuilder {
759    fn append_value(&mut self, _: ()) {
760        self.item_count += 1;
761    }
762
763    fn append_null(&mut self) {
764        self.item_count += 1;
765    }
766
767    fn finish(self) -> NullArray {
768        NullArray::new(self.item_count)
769    }
770}
771
772define_variant_to_primitive_builder!(
773    struct VariantToNullArrowRowBuilder<'a>
774    |_capacity| -> FakeNullBuilder { FakeNullBuilder::default() },
775    |value| value.as_null(),
776    type_name: "Null"
777);
778
779#[cfg(test)]
780mod tests {
781    use super::make_primitive_variant_to_arrow_row_builder;
782    use arrow::compute::CastOptions;
783    use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode};
784    use arrow::error::ArrowError;
785    use std::sync::Arc;
786
787    #[test]
788    fn make_primitive_builder_rejects_non_primitive_types() {
789        let cast_options = CastOptions::default();
790        let item_field = Arc::new(Field::new("item", DataType::Int32, true));
791        let struct_fields = Fields::from(vec![Field::new("child", DataType::Int32, true)]);
792        let map_entries_field = Arc::new(Field::new(
793            "entries",
794            DataType::Struct(Fields::from(vec![
795                Field::new("key", DataType::Utf8, false),
796                Field::new("value", DataType::Float64, true),
797            ])),
798            true,
799        ));
800        let union_fields =
801            UnionFields::new(vec![1], vec![Field::new("child", DataType::Int32, true)]);
802        let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false));
803        let ree_values_field = Arc::new(Field::new("values", DataType::Utf8, true));
804
805        let non_primitive_types = vec![
806            DataType::List(item_field.clone()),
807            DataType::LargeList(item_field.clone()),
808            DataType::ListView(item_field.clone()),
809            DataType::LargeListView(item_field.clone()),
810            DataType::FixedSizeList(item_field.clone(), 2),
811            DataType::Struct(struct_fields.clone()),
812            DataType::Map(map_entries_field.clone(), false),
813            DataType::Union(union_fields.clone(), UnionMode::Dense),
814            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
815            DataType::RunEndEncoded(run_ends_field.clone(), ree_values_field.clone()),
816        ];
817
818        for data_type in non_primitive_types {
819            let err =
820                match make_primitive_variant_to_arrow_row_builder(&data_type, &cast_options, 1) {
821                    Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"),
822                    Err(err) => err,
823                };
824
825            match err {
826                ArrowError::InvalidArgumentError(msg) => {
827                    assert!(msg.contains(&format!("{data_type:?}")));
828                }
829                other => panic!("expected InvalidArgumentError, got {other:?}"),
830            }
831        }
832    }
833}