parquet_variant_compute/
variant_to_arrow.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::{
19    ArrayRef, BinaryBuilder, BinaryLikeArrayBuilder, BinaryViewArray, BinaryViewBuilder,
20    BooleanBuilder, FixedSizeBinaryBuilder, LargeBinaryBuilder, LargeStringBuilder, NullArray,
21    NullBufferBuilder, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder,
22};
23use arrow::compute::{CastOptions, DecimalCast};
24use arrow::datatypes::{self, DataType, DecimalType};
25use arrow::error::{ArrowError, Result};
26use parquet_variant::{Variant, VariantPath};
27
28use crate::type_conversion::{
29    PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal,
30};
31use crate::{VariantArray, VariantValueArrayBuilder};
32
33use arrow_schema::TimeUnit;
34use std::sync::Arc;
35
36/// Builder for converting primitive variant values to Arrow arrays. It is used by both
37/// `VariantToArrowRowBuilder` (below) and `VariantToShreddedPrimitiveVariantRowBuilder` (in
38/// `shred_variant.rs`).
39pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
40    Null(VariantToNullArrowRowBuilder<'a>),
41    Boolean(VariantToBooleanArrowRowBuilder<'a>),
42    Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
43    Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
44    Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
45    Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>),
46    UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
47    UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
48    UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
49    UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
50    Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
51    Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
52    Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
53    Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>),
54    Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>),
55    Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>),
56    Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>),
57    TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>),
58    TimestampMicroNtz(
59        VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>,
60    ),
61    TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
62    TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
63    Time(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>),
64    Date(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
65    Uuid(VariantToUuidArrowRowBuilder<'a>),
66    String(VariantToStringArrowBuilder<'a, StringBuilder>),
67    LargeString(VariantToStringArrowBuilder<'a, LargeStringBuilder>),
68    StringView(VariantToStringArrowBuilder<'a, StringViewBuilder>),
69    Binary(VariantToBinaryArrowRowBuilder<'a, BinaryBuilder>),
70    LargeBinary(VariantToBinaryArrowRowBuilder<'a, LargeBinaryBuilder>),
71    BinaryView(VariantToBinaryArrowRowBuilder<'a, BinaryViewBuilder>),
72}
73
74/// Builder for converting variant values into strongly typed Arrow arrays.
75///
76/// Useful for variant_get kernels that need to extract specific paths from variant values, possibly
77/// with casting of leaf values to specific types.
78pub(crate) enum VariantToArrowRowBuilder<'a> {
79    Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
80    BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
81
82    // Path extraction wrapper - contains a boxed enum for any of the above
83    WithPath(VariantPathRowBuilder<'a>),
84}
85
86impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
87    pub fn append_null(&mut self) -> Result<()> {
88        use PrimitiveVariantToArrowRowBuilder::*;
89        match self {
90            Null(b) => b.append_null(),
91            Boolean(b) => b.append_null(),
92            Int8(b) => b.append_null(),
93            Int16(b) => b.append_null(),
94            Int32(b) => b.append_null(),
95            Int64(b) => b.append_null(),
96            UInt8(b) => b.append_null(),
97            UInt16(b) => b.append_null(),
98            UInt32(b) => b.append_null(),
99            UInt64(b) => b.append_null(),
100            Float16(b) => b.append_null(),
101            Float32(b) => b.append_null(),
102            Float64(b) => b.append_null(),
103            Decimal32(b) => b.append_null(),
104            Decimal64(b) => b.append_null(),
105            Decimal128(b) => b.append_null(),
106            Decimal256(b) => b.append_null(),
107            TimestampMicro(b) => b.append_null(),
108            TimestampMicroNtz(b) => b.append_null(),
109            TimestampNano(b) => b.append_null(),
110            TimestampNanoNtz(b) => b.append_null(),
111            Time(b) => b.append_null(),
112            Date(b) => b.append_null(),
113            Uuid(b) => b.append_null(),
114            String(b) => b.append_null(),
115            LargeString(b) => b.append_null(),
116            StringView(b) => b.append_null(),
117            Binary(b) => b.append_null(),
118            LargeBinary(b) => b.append_null(),
119            BinaryView(b) => b.append_null(),
120        }
121    }
122
123    pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
124        use PrimitiveVariantToArrowRowBuilder::*;
125        match self {
126            Null(b) => b.append_value(value),
127            Boolean(b) => b.append_value(value),
128            Int8(b) => b.append_value(value),
129            Int16(b) => b.append_value(value),
130            Int32(b) => b.append_value(value),
131            Int64(b) => b.append_value(value),
132            UInt8(b) => b.append_value(value),
133            UInt16(b) => b.append_value(value),
134            UInt32(b) => b.append_value(value),
135            UInt64(b) => b.append_value(value),
136            Float16(b) => b.append_value(value),
137            Float32(b) => b.append_value(value),
138            Float64(b) => b.append_value(value),
139            Decimal32(b) => b.append_value(value),
140            Decimal64(b) => b.append_value(value),
141            Decimal128(b) => b.append_value(value),
142            Decimal256(b) => b.append_value(value),
143            TimestampMicro(b) => b.append_value(value),
144            TimestampMicroNtz(b) => b.append_value(value),
145            TimestampNano(b) => b.append_value(value),
146            TimestampNanoNtz(b) => b.append_value(value),
147            Time(b) => b.append_value(value),
148            Date(b) => b.append_value(value),
149            Uuid(b) => b.append_value(value),
150            String(b) => b.append_value(value),
151            LargeString(b) => b.append_value(value),
152            StringView(b) => b.append_value(value),
153            Binary(b) => b.append_value(value),
154            LargeBinary(b) => b.append_value(value),
155            BinaryView(b) => b.append_value(value),
156        }
157    }
158
159    pub fn finish(self) -> Result<ArrayRef> {
160        use PrimitiveVariantToArrowRowBuilder::*;
161        match self {
162            Null(b) => b.finish(),
163            Boolean(b) => b.finish(),
164            Int8(b) => b.finish(),
165            Int16(b) => b.finish(),
166            Int32(b) => b.finish(),
167            Int64(b) => b.finish(),
168            UInt8(b) => b.finish(),
169            UInt16(b) => b.finish(),
170            UInt32(b) => b.finish(),
171            UInt64(b) => b.finish(),
172            Float16(b) => b.finish(),
173            Float32(b) => b.finish(),
174            Float64(b) => b.finish(),
175            Decimal32(b) => b.finish(),
176            Decimal64(b) => b.finish(),
177            Decimal128(b) => b.finish(),
178            Decimal256(b) => b.finish(),
179            TimestampMicro(b) => b.finish(),
180            TimestampMicroNtz(b) => b.finish(),
181            TimestampNano(b) => b.finish(),
182            TimestampNanoNtz(b) => b.finish(),
183            Time(b) => b.finish(),
184            Date(b) => b.finish(),
185            Uuid(b) => b.finish(),
186            String(b) => b.finish(),
187            LargeString(b) => b.finish(),
188            StringView(b) => b.finish(),
189            Binary(b) => b.finish(),
190            LargeBinary(b) => b.finish(),
191            BinaryView(b) => b.finish(),
192        }
193    }
194}
195
196impl<'a> VariantToArrowRowBuilder<'a> {
197    pub fn append_null(&mut self) -> Result<()> {
198        use VariantToArrowRowBuilder::*;
199        match self {
200            Primitive(b) => b.append_null(),
201            BinaryVariant(b) => b.append_null(),
202            WithPath(path_builder) => path_builder.append_null(),
203        }
204    }
205
206    pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
207        use VariantToArrowRowBuilder::*;
208        match self {
209            Primitive(b) => b.append_value(&value),
210            BinaryVariant(b) => b.append_value(value),
211            WithPath(path_builder) => path_builder.append_value(value),
212        }
213    }
214
215    pub fn finish(self) -> Result<ArrayRef> {
216        use VariantToArrowRowBuilder::*;
217        match self {
218            Primitive(b) => b.finish(),
219            BinaryVariant(b) => b.finish(),
220            WithPath(path_builder) => path_builder.finish(),
221        }
222    }
223}
224
225/// Creates a row builder that converts primitive `Variant` values into the requested Arrow data type.
226pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
227    data_type: &'a DataType,
228    cast_options: &'a CastOptions,
229    capacity: usize,
230) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
231    use PrimitiveVariantToArrowRowBuilder::*;
232
233    let builder =
234        match data_type {
235            DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
236            DataType::Boolean => {
237                Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity))
238            }
239            DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
240                cast_options,
241                capacity,
242            )),
243            DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
244                cast_options,
245                capacity,
246            )),
247            DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
248                cast_options,
249                capacity,
250            )),
251            DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
252                cast_options,
253                capacity,
254            )),
255            DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
256                cast_options,
257                capacity,
258            )),
259            DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
260                cast_options,
261                capacity,
262            )),
263            DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
264                cast_options,
265                capacity,
266            )),
267            DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
268                cast_options,
269                capacity,
270            )),
271            DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
272                cast_options,
273                capacity,
274            )),
275            DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
276                cast_options,
277                capacity,
278            )),
279            DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
280                cast_options,
281                capacity,
282            )),
283            DataType::Decimal32(precision, scale) => Decimal32(
284                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
285            ),
286            DataType::Decimal64(precision, scale) => Decimal64(
287                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
288            ),
289            DataType::Decimal128(precision, scale) => Decimal128(
290                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
291            ),
292            DataType::Decimal256(precision, scale) => Decimal256(
293                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
294            ),
295            DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
296                cast_options,
297                capacity,
298            )),
299            DataType::Date64 | DataType::Time32(_) => {
300                return Err(ArrowError::NotYetImplemented(format!(
301                    "DataType {data_type:?} not yet implemented"
302                )));
303            }
304            DataType::Time64(TimeUnit::Microsecond) => Time(
305                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
306            ),
307            DataType::Time64(_) => {
308                return Err(ArrowError::NotYetImplemented(format!(
309                    "DataType {data_type:?} not yet implemented"
310                )));
311            }
312            DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
313                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
314            ),
315            DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
316                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
317            ),
318            DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
319                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
320            ),
321            DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
322                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
323            ),
324            DataType::Timestamp(..) => {
325                return Err(ArrowError::NotYetImplemented(format!(
326                    "DataType {data_type:?} not yet implemented"
327                )));
328            }
329            DataType::Duration(_) | DataType::Interval(_) => {
330                return Err(ArrowError::InvalidArgumentError(
331                    "Casting Variant to duration/interval types is not supported. \
332                    The Variant format does not define duration/interval types."
333                        .to_string(),
334                ));
335            }
336            DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)),
337            DataType::LargeBinary => {
338                LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
339            }
340            DataType::BinaryView => {
341                BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
342            }
343            DataType::FixedSizeBinary(16) => {
344                Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
345            }
346            DataType::FixedSizeBinary(_) => {
347                return Err(ArrowError::NotYetImplemented(format!(
348                    "DataType {data_type:?} not yet implemented"
349                )));
350            }
351            DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)),
352            DataType::LargeUtf8 => {
353                LargeString(VariantToStringArrowBuilder::new(cast_options, capacity))
354            }
355            DataType::Utf8View => {
356                StringView(VariantToStringArrowBuilder::new(cast_options, capacity))
357            }
358            DataType::List(_)
359            | DataType::LargeList(_)
360            | DataType::ListView(_)
361            | DataType::LargeListView(_)
362            | DataType::FixedSizeList(..)
363            | DataType::Struct(_)
364            | DataType::Map(..)
365            | DataType::Union(..)
366            | DataType::Dictionary(..)
367            | DataType::RunEndEncoded(..) => {
368                return Err(ArrowError::InvalidArgumentError(format!(
369                    "Casting to {data_type:?} is not applicable for primitive Variant types"
370                )));
371            }
372        };
373    Ok(builder)
374}
375
376pub(crate) fn make_variant_to_arrow_row_builder<'a>(
377    metadata: &BinaryViewArray,
378    path: VariantPath<'a>,
379    data_type: Option<&'a DataType>,
380    cast_options: &'a CastOptions,
381    capacity: usize,
382) -> Result<VariantToArrowRowBuilder<'a>> {
383    use VariantToArrowRowBuilder::*;
384
385    let mut builder = match data_type {
386        // If no data type was requested, build an unshredded VariantArray.
387        None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
388            metadata.clone(),
389            capacity,
390        )),
391        Some(DataType::Struct(_)) => {
392            return Err(ArrowError::NotYetImplemented(
393                "Converting unshredded variant objects to arrow structs".to_string(),
394            ));
395        }
396        Some(
397            DataType::List(_)
398            | DataType::LargeList(_)
399            | DataType::ListView(_)
400            | DataType::LargeListView(_)
401            | DataType::FixedSizeList(..),
402        ) => {
403            return Err(ArrowError::NotYetImplemented(
404                "Converting unshredded variant arrays to arrow lists".to_string(),
405            ));
406        }
407        Some(data_type) => {
408            let builder =
409                make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
410            Primitive(builder)
411        }
412    };
413
414    // Wrap with path extraction if needed
415    if !path.is_empty() {
416        builder = WithPath(VariantPathRowBuilder {
417            builder: Box::new(builder),
418            path,
419        })
420    };
421
422    Ok(builder)
423}
424
425/// A thin wrapper whose only job is to extract a specific path from a variant value and pass the
426/// result to a nested builder.
427pub(crate) struct VariantPathRowBuilder<'a> {
428    builder: Box<VariantToArrowRowBuilder<'a>>,
429    path: VariantPath<'a>,
430}
431
432impl<'a> VariantPathRowBuilder<'a> {
433    fn append_null(&mut self) -> Result<()> {
434        self.builder.append_null()
435    }
436
437    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
438        if let Some(v) = value.get_path(&self.path) {
439            self.builder.append_value(v)
440        } else {
441            self.builder.append_null()?;
442            Ok(false)
443        }
444    }
445
446    fn finish(self) -> Result<ArrayRef> {
447        self.builder.finish()
448    }
449}
450
451macro_rules! define_variant_to_primitive_builder {
452    (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
453    |$array_param:ident $(, $field:ident: $field_type:ty)?| -> $builder_name:ident $(< $array_type:ty >)? { $init_expr: expr },
454    |$value: ident| $value_transform:expr,
455    type_name: $type_name:expr) => {
456        pub(crate) struct $name<$lifetime $(, $generic : $bound )?>
457        {
458            builder: $builder_name $(<$array_type>)?,
459            cast_options: &$lifetime CastOptions<$lifetime>,
460        }
461
462        impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic )?> {
463            fn new(
464                cast_options: &$lifetime CastOptions<$lifetime>,
465                $array_param: usize,
466                // add this so that $init_expr can use it
467                $( $field: $field_type, )?
468            ) -> Self {
469                Self {
470                    builder: $init_expr,
471                    cast_options,
472                }
473            }
474
475            fn append_null(&mut self) -> Result<()> {
476                self.builder.append_null();
477                Ok(())
478            }
479
480            fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result<bool> {
481                if let Some(v) = $value_transform {
482                    self.builder.append_value(v);
483                    Ok(true)
484                } else {
485                    if !self.cast_options.safe {
486                        // Unsafe casting: return error on conversion failure
487                        return Err(ArrowError::CastError(format!(
488                            "Failed to extract primitive of type {} from variant {:?} at path VariantPath([])",
489                            $type_name,
490                            $value
491                        )));
492                    }
493                    // Safe casting: append null on conversion failure
494                    self.builder.append_null();
495                    Ok(false)
496                }
497            }
498
499            // Add this to silence unused mut warning from macro-generated code
500            // This is mainly for `FakeNullBuilder`
501            #[allow(unused_mut)]
502            fn finish(mut self) -> Result<ArrayRef> {
503                Ok(Arc::new(self.builder.finish()))
504            }
505        }
506    }
507}
508
509define_variant_to_primitive_builder!(
510    struct VariantToStringArrowBuilder<'a, B: StringLikeArrayBuilder>
511    |capacity| -> B { B::with_capacity(capacity) },
512    |value| value.as_string(),
513    type_name: B::type_name()
514);
515
516define_variant_to_primitive_builder!(
517    struct VariantToBooleanArrowRowBuilder<'a>
518    |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
519    |value|  value.as_boolean(),
520    type_name: datatypes::BooleanType::DATA_TYPE
521);
522
523define_variant_to_primitive_builder!(
524    struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant>
525    |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
526    |value| T::from_variant(value),
527    type_name: T::DATA_TYPE
528);
529
530define_variant_to_primitive_builder!(
531    struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant<true>>
532    |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
533    |value| T::from_variant(value),
534    type_name: T::DATA_TYPE
535);
536
537define_variant_to_primitive_builder!(
538    struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
539    |capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
540        PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
541    },
542    |value| T::from_variant(value),
543    type_name: T::DATA_TYPE
544);
545
546define_variant_to_primitive_builder!(
547    struct VariantToBinaryArrowRowBuilder<'a, B: BinaryLikeArrayBuilder>
548    |capacity| -> B { B::with_capacity(capacity) },
549    |value| value.as_u8_slice(),
550    type_name: B::type_name()
551);
552
553/// Builder for converting variant values to arrow Decimal values
554pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T>
555where
556    T: DecimalType,
557    T::Native: DecimalCast,
558{
559    builder: PrimitiveBuilder<T>,
560    cast_options: &'a CastOptions<'a>,
561    precision: u8,
562    scale: i8,
563}
564
565impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T>
566where
567    T: DecimalType,
568    T::Native: DecimalCast,
569{
570    fn new(
571        cast_options: &'a CastOptions<'a>,
572        capacity: usize,
573        precision: u8,
574        scale: i8,
575    ) -> Result<Self> {
576        let builder = PrimitiveBuilder::<T>::with_capacity(capacity)
577            .with_precision_and_scale(precision, scale)?;
578        Ok(Self {
579            builder,
580            cast_options,
581            precision,
582            scale,
583        })
584    }
585
586    fn append_null(&mut self) -> Result<()> {
587        self.builder.append_null();
588        Ok(())
589    }
590
591    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
592        if let Some(scaled) = variant_to_unscaled_decimal::<T>(value, self.precision, self.scale) {
593            self.builder.append_value(scaled);
594            Ok(true)
595        } else if self.cast_options.safe {
596            self.builder.append_null();
597            Ok(false)
598        } else {
599            Err(ArrowError::CastError(format!(
600                "Failed to cast to {}(precision={}, scale={}) from variant {:?}",
601                T::PREFIX,
602                self.precision,
603                self.scale,
604                value
605            )))
606        }
607    }
608
609    fn finish(mut self) -> Result<ArrayRef> {
610        Ok(Arc::new(self.builder.finish()))
611    }
612}
613
614/// Builder for converting variant values to FixedSizeBinary(16) for UUIDs
615pub(crate) struct VariantToUuidArrowRowBuilder<'a> {
616    builder: FixedSizeBinaryBuilder,
617    cast_options: &'a CastOptions<'a>,
618}
619
620impl<'a> VariantToUuidArrowRowBuilder<'a> {
621    fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
622        Self {
623            builder: FixedSizeBinaryBuilder::with_capacity(capacity, 16),
624            cast_options,
625        }
626    }
627
628    fn append_null(&mut self) -> Result<()> {
629        self.builder.append_null();
630        Ok(())
631    }
632
633    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
634        match value.as_uuid() {
635            Some(uuid) => {
636                self.builder
637                    .append_value(uuid.as_bytes())
638                    .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
639
640                Ok(true)
641            }
642            None if self.cast_options.safe => {
643                self.builder.append_null();
644                Ok(false)
645            }
646            None => Err(ArrowError::CastError(format!(
647                "Failed to extract UUID from variant {value:?}",
648            ))),
649        }
650    }
651
652    fn finish(mut self) -> Result<ArrayRef> {
653        Ok(Arc::new(self.builder.finish()))
654    }
655}
656
657/// Builder for creating VariantArray output (for path extraction without type conversion)
658pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
659    metadata: BinaryViewArray,
660    builder: VariantValueArrayBuilder,
661    nulls: NullBufferBuilder,
662}
663
664impl VariantToBinaryVariantArrowRowBuilder {
665    fn new(metadata: BinaryViewArray, capacity: usize) -> Self {
666        Self {
667            metadata,
668            builder: VariantValueArrayBuilder::new(capacity),
669            nulls: NullBufferBuilder::new(capacity),
670        }
671    }
672}
673
674impl VariantToBinaryVariantArrowRowBuilder {
675    fn append_null(&mut self) -> Result<()> {
676        self.builder.append_null();
677        self.nulls.append_null();
678        Ok(())
679    }
680
681    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
682        self.builder.append_value(value);
683        self.nulls.append_non_null();
684        Ok(true)
685    }
686
687    fn finish(mut self) -> Result<ArrayRef> {
688        let variant_array = VariantArray::from_parts(
689            self.metadata,
690            Some(self.builder.build()?),
691            None, // no typed_value column
692            self.nulls.finish(),
693        );
694
695        Ok(ArrayRef::from(variant_array))
696    }
697}
698
699#[derive(Default)]
700struct FakeNullBuilder {
701    item_count: usize,
702}
703
704impl FakeNullBuilder {
705    fn append_value(&mut self, _: ()) {
706        self.item_count += 1;
707    }
708
709    fn append_null(&mut self) {
710        self.item_count += 1;
711    }
712
713    fn finish(self) -> NullArray {
714        NullArray::new(self.item_count)
715    }
716}
717
718define_variant_to_primitive_builder!(
719    struct VariantToNullArrowRowBuilder<'a>
720    |_capacity| -> FakeNullBuilder { FakeNullBuilder::default() },
721    |value| value.as_null(),
722    type_name: "Null"
723);
724
725#[cfg(test)]
726mod tests {
727    use super::make_primitive_variant_to_arrow_row_builder;
728    use arrow::compute::CastOptions;
729    use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode};
730    use arrow::error::ArrowError;
731    use std::sync::Arc;
732
733    #[test]
734    fn make_primitive_builder_rejects_non_primitive_types() {
735        let cast_options = CastOptions::default();
736        let item_field = Arc::new(Field::new("item", DataType::Int32, true));
737        let struct_fields = Fields::from(vec![Field::new("child", DataType::Int32, true)]);
738        let map_entries_field = Arc::new(Field::new(
739            "entries",
740            DataType::Struct(Fields::from(vec![
741                Field::new("key", DataType::Utf8, false),
742                Field::new("value", DataType::Float64, true),
743            ])),
744            true,
745        ));
746        let union_fields =
747            UnionFields::new(vec![1], vec![Field::new("child", DataType::Int32, true)]);
748        let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false));
749        let ree_values_field = Arc::new(Field::new("values", DataType::Utf8, true));
750
751        let non_primitive_types = vec![
752            DataType::List(item_field.clone()),
753            DataType::LargeList(item_field.clone()),
754            DataType::ListView(item_field.clone()),
755            DataType::LargeListView(item_field.clone()),
756            DataType::FixedSizeList(item_field.clone(), 2),
757            DataType::Struct(struct_fields.clone()),
758            DataType::Map(map_entries_field.clone(), false),
759            DataType::Union(union_fields.clone(), UnionMode::Dense),
760            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
761            DataType::RunEndEncoded(run_ends_field.clone(), ree_values_field.clone()),
762        ];
763
764        for data_type in non_primitive_types {
765            let err =
766                match make_primitive_variant_to_arrow_row_builder(&data_type, &cast_options, 1) {
767                    Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"),
768                    Err(err) => err,
769                };
770
771            match err {
772                ArrowError::InvalidArgumentError(msg) => {
773                    assert!(msg.contains(&format!("{data_type:?}")));
774                }
775                other => panic!("expected InvalidArgumentError, got {other:?}"),
776            }
777        }
778    }
779}