1use arrow::array::{
19 ArrayRef, BinaryBuilder, BinaryLikeArrayBuilder, BinaryViewArray, BinaryViewBuilder,
20 BooleanBuilder, FixedSizeBinaryBuilder, LargeBinaryBuilder, LargeStringBuilder, NullArray,
21 NullBufferBuilder, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder,
22};
23use arrow::compute::{CastOptions, DecimalCast};
24use arrow::datatypes::{self, DataType, DecimalType};
25use arrow::error::{ArrowError, Result};
26use parquet_variant::{Variant, VariantPath};
27
28use crate::type_conversion::{
29 PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal,
30};
31use crate::{VariantArray, VariantValueArrayBuilder};
32
33use arrow_schema::TimeUnit;
34use std::sync::Arc;
35
36pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
40 Null(VariantToNullArrowRowBuilder<'a>),
41 Boolean(VariantToBooleanArrowRowBuilder<'a>),
42 Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
43 Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
44 Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
45 Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>),
46 UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
47 UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
48 UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
49 UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
50 Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
51 Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
52 Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
53 Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>),
54 Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>),
55 Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>),
56 Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>),
57 TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>),
58 TimestampMicroNtz(
59 VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>,
60 ),
61 TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
62 TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
63 Time(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>),
64 Date(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
65 Uuid(VariantToUuidArrowRowBuilder<'a>),
66 String(VariantToStringArrowBuilder<'a, StringBuilder>),
67 LargeString(VariantToStringArrowBuilder<'a, LargeStringBuilder>),
68 StringView(VariantToStringArrowBuilder<'a, StringViewBuilder>),
69 Binary(VariantToBinaryArrowRowBuilder<'a, BinaryBuilder>),
70 LargeBinary(VariantToBinaryArrowRowBuilder<'a, LargeBinaryBuilder>),
71 BinaryView(VariantToBinaryArrowRowBuilder<'a, BinaryViewBuilder>),
72}
73
74pub(crate) enum VariantToArrowRowBuilder<'a> {
79 Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
80 BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
81
82 WithPath(VariantPathRowBuilder<'a>),
84}
85
86impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
87 pub fn append_null(&mut self) -> Result<()> {
88 use PrimitiveVariantToArrowRowBuilder::*;
89 match self {
90 Null(b) => b.append_null(),
91 Boolean(b) => b.append_null(),
92 Int8(b) => b.append_null(),
93 Int16(b) => b.append_null(),
94 Int32(b) => b.append_null(),
95 Int64(b) => b.append_null(),
96 UInt8(b) => b.append_null(),
97 UInt16(b) => b.append_null(),
98 UInt32(b) => b.append_null(),
99 UInt64(b) => b.append_null(),
100 Float16(b) => b.append_null(),
101 Float32(b) => b.append_null(),
102 Float64(b) => b.append_null(),
103 Decimal32(b) => b.append_null(),
104 Decimal64(b) => b.append_null(),
105 Decimal128(b) => b.append_null(),
106 Decimal256(b) => b.append_null(),
107 TimestampMicro(b) => b.append_null(),
108 TimestampMicroNtz(b) => b.append_null(),
109 TimestampNano(b) => b.append_null(),
110 TimestampNanoNtz(b) => b.append_null(),
111 Time(b) => b.append_null(),
112 Date(b) => b.append_null(),
113 Uuid(b) => b.append_null(),
114 String(b) => b.append_null(),
115 LargeString(b) => b.append_null(),
116 StringView(b) => b.append_null(),
117 Binary(b) => b.append_null(),
118 LargeBinary(b) => b.append_null(),
119 BinaryView(b) => b.append_null(),
120 }
121 }
122
123 pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
124 use PrimitiveVariantToArrowRowBuilder::*;
125 match self {
126 Null(b) => b.append_value(value),
127 Boolean(b) => b.append_value(value),
128 Int8(b) => b.append_value(value),
129 Int16(b) => b.append_value(value),
130 Int32(b) => b.append_value(value),
131 Int64(b) => b.append_value(value),
132 UInt8(b) => b.append_value(value),
133 UInt16(b) => b.append_value(value),
134 UInt32(b) => b.append_value(value),
135 UInt64(b) => b.append_value(value),
136 Float16(b) => b.append_value(value),
137 Float32(b) => b.append_value(value),
138 Float64(b) => b.append_value(value),
139 Decimal32(b) => b.append_value(value),
140 Decimal64(b) => b.append_value(value),
141 Decimal128(b) => b.append_value(value),
142 Decimal256(b) => b.append_value(value),
143 TimestampMicro(b) => b.append_value(value),
144 TimestampMicroNtz(b) => b.append_value(value),
145 TimestampNano(b) => b.append_value(value),
146 TimestampNanoNtz(b) => b.append_value(value),
147 Time(b) => b.append_value(value),
148 Date(b) => b.append_value(value),
149 Uuid(b) => b.append_value(value),
150 String(b) => b.append_value(value),
151 LargeString(b) => b.append_value(value),
152 StringView(b) => b.append_value(value),
153 Binary(b) => b.append_value(value),
154 LargeBinary(b) => b.append_value(value),
155 BinaryView(b) => b.append_value(value),
156 }
157 }
158
159 pub fn finish(self) -> Result<ArrayRef> {
160 use PrimitiveVariantToArrowRowBuilder::*;
161 match self {
162 Null(b) => b.finish(),
163 Boolean(b) => b.finish(),
164 Int8(b) => b.finish(),
165 Int16(b) => b.finish(),
166 Int32(b) => b.finish(),
167 Int64(b) => b.finish(),
168 UInt8(b) => b.finish(),
169 UInt16(b) => b.finish(),
170 UInt32(b) => b.finish(),
171 UInt64(b) => b.finish(),
172 Float16(b) => b.finish(),
173 Float32(b) => b.finish(),
174 Float64(b) => b.finish(),
175 Decimal32(b) => b.finish(),
176 Decimal64(b) => b.finish(),
177 Decimal128(b) => b.finish(),
178 Decimal256(b) => b.finish(),
179 TimestampMicro(b) => b.finish(),
180 TimestampMicroNtz(b) => b.finish(),
181 TimestampNano(b) => b.finish(),
182 TimestampNanoNtz(b) => b.finish(),
183 Time(b) => b.finish(),
184 Date(b) => b.finish(),
185 Uuid(b) => b.finish(),
186 String(b) => b.finish(),
187 LargeString(b) => b.finish(),
188 StringView(b) => b.finish(),
189 Binary(b) => b.finish(),
190 LargeBinary(b) => b.finish(),
191 BinaryView(b) => b.finish(),
192 }
193 }
194}
195
196impl<'a> VariantToArrowRowBuilder<'a> {
197 pub fn append_null(&mut self) -> Result<()> {
198 use VariantToArrowRowBuilder::*;
199 match self {
200 Primitive(b) => b.append_null(),
201 BinaryVariant(b) => b.append_null(),
202 WithPath(path_builder) => path_builder.append_null(),
203 }
204 }
205
206 pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
207 use VariantToArrowRowBuilder::*;
208 match self {
209 Primitive(b) => b.append_value(&value),
210 BinaryVariant(b) => b.append_value(value),
211 WithPath(path_builder) => path_builder.append_value(value),
212 }
213 }
214
215 pub fn finish(self) -> Result<ArrayRef> {
216 use VariantToArrowRowBuilder::*;
217 match self {
218 Primitive(b) => b.finish(),
219 BinaryVariant(b) => b.finish(),
220 WithPath(path_builder) => path_builder.finish(),
221 }
222 }
223}
224
225pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
227 data_type: &'a DataType,
228 cast_options: &'a CastOptions,
229 capacity: usize,
230) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
231 use PrimitiveVariantToArrowRowBuilder::*;
232
233 let builder =
234 match data_type {
235 DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
236 DataType::Boolean => {
237 Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity))
238 }
239 DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
240 cast_options,
241 capacity,
242 )),
243 DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
244 cast_options,
245 capacity,
246 )),
247 DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
248 cast_options,
249 capacity,
250 )),
251 DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
252 cast_options,
253 capacity,
254 )),
255 DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
256 cast_options,
257 capacity,
258 )),
259 DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
260 cast_options,
261 capacity,
262 )),
263 DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
264 cast_options,
265 capacity,
266 )),
267 DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
268 cast_options,
269 capacity,
270 )),
271 DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
272 cast_options,
273 capacity,
274 )),
275 DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
276 cast_options,
277 capacity,
278 )),
279 DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
280 cast_options,
281 capacity,
282 )),
283 DataType::Decimal32(precision, scale) => Decimal32(
284 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
285 ),
286 DataType::Decimal64(precision, scale) => Decimal64(
287 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
288 ),
289 DataType::Decimal128(precision, scale) => Decimal128(
290 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
291 ),
292 DataType::Decimal256(precision, scale) => Decimal256(
293 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
294 ),
295 DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
296 cast_options,
297 capacity,
298 )),
299 DataType::Date64 | DataType::Time32(_) => {
300 return Err(ArrowError::NotYetImplemented(format!(
301 "DataType {data_type:?} not yet implemented"
302 )));
303 }
304 DataType::Time64(TimeUnit::Microsecond) => Time(
305 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
306 ),
307 DataType::Time64(_) => {
308 return Err(ArrowError::NotYetImplemented(format!(
309 "DataType {data_type:?} not yet implemented"
310 )));
311 }
312 DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
313 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
314 ),
315 DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
316 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
317 ),
318 DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
319 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
320 ),
321 DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
322 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
323 ),
324 DataType::Timestamp(..) => {
325 return Err(ArrowError::NotYetImplemented(format!(
326 "DataType {data_type:?} not yet implemented"
327 )));
328 }
329 DataType::Duration(_) | DataType::Interval(_) => {
330 return Err(ArrowError::InvalidArgumentError(
331 "Casting Variant to duration/interval types is not supported. \
332 The Variant format does not define duration/interval types."
333 .to_string(),
334 ));
335 }
336 DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)),
337 DataType::LargeBinary => {
338 LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
339 }
340 DataType::BinaryView => {
341 BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
342 }
343 DataType::FixedSizeBinary(16) => {
344 Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
345 }
346 DataType::FixedSizeBinary(_) => {
347 return Err(ArrowError::NotYetImplemented(format!(
348 "DataType {data_type:?} not yet implemented"
349 )));
350 }
351 DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)),
352 DataType::LargeUtf8 => {
353 LargeString(VariantToStringArrowBuilder::new(cast_options, capacity))
354 }
355 DataType::Utf8View => {
356 StringView(VariantToStringArrowBuilder::new(cast_options, capacity))
357 }
358 DataType::List(_)
359 | DataType::LargeList(_)
360 | DataType::ListView(_)
361 | DataType::LargeListView(_)
362 | DataType::FixedSizeList(..)
363 | DataType::Struct(_)
364 | DataType::Map(..)
365 | DataType::Union(..)
366 | DataType::Dictionary(..)
367 | DataType::RunEndEncoded(..) => {
368 return Err(ArrowError::InvalidArgumentError(format!(
369 "Casting to {data_type:?} is not applicable for primitive Variant types"
370 )));
371 }
372 };
373 Ok(builder)
374}
375
376pub(crate) fn make_variant_to_arrow_row_builder<'a>(
377 metadata: &BinaryViewArray,
378 path: VariantPath<'a>,
379 data_type: Option<&'a DataType>,
380 cast_options: &'a CastOptions,
381 capacity: usize,
382) -> Result<VariantToArrowRowBuilder<'a>> {
383 use VariantToArrowRowBuilder::*;
384
385 let mut builder = match data_type {
386 None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
388 metadata.clone(),
389 capacity,
390 )),
391 Some(DataType::Struct(_)) => {
392 return Err(ArrowError::NotYetImplemented(
393 "Converting unshredded variant objects to arrow structs".to_string(),
394 ));
395 }
396 Some(
397 DataType::List(_)
398 | DataType::LargeList(_)
399 | DataType::ListView(_)
400 | DataType::LargeListView(_)
401 | DataType::FixedSizeList(..),
402 ) => {
403 return Err(ArrowError::NotYetImplemented(
404 "Converting unshredded variant arrays to arrow lists".to_string(),
405 ));
406 }
407 Some(data_type) => {
408 let builder =
409 make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
410 Primitive(builder)
411 }
412 };
413
414 if !path.is_empty() {
416 builder = WithPath(VariantPathRowBuilder {
417 builder: Box::new(builder),
418 path,
419 })
420 };
421
422 Ok(builder)
423}
424
425pub(crate) struct VariantPathRowBuilder<'a> {
428 builder: Box<VariantToArrowRowBuilder<'a>>,
429 path: VariantPath<'a>,
430}
431
432impl<'a> VariantPathRowBuilder<'a> {
433 fn append_null(&mut self) -> Result<()> {
434 self.builder.append_null()
435 }
436
437 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
438 if let Some(v) = value.get_path(&self.path) {
439 self.builder.append_value(v)
440 } else {
441 self.builder.append_null()?;
442 Ok(false)
443 }
444 }
445
446 fn finish(self) -> Result<ArrayRef> {
447 self.builder.finish()
448 }
449}
450
451macro_rules! define_variant_to_primitive_builder {
452 (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
453 |$array_param:ident $(, $field:ident: $field_type:ty)?| -> $builder_name:ident $(< $array_type:ty >)? { $init_expr: expr },
454 |$value: ident| $value_transform:expr,
455 type_name: $type_name:expr) => {
456 pub(crate) struct $name<$lifetime $(, $generic : $bound )?>
457 {
458 builder: $builder_name $(<$array_type>)?,
459 cast_options: &$lifetime CastOptions<$lifetime>,
460 }
461
462 impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic )?> {
463 fn new(
464 cast_options: &$lifetime CastOptions<$lifetime>,
465 $array_param: usize,
466 $( $field: $field_type, )?
468 ) -> Self {
469 Self {
470 builder: $init_expr,
471 cast_options,
472 }
473 }
474
475 fn append_null(&mut self) -> Result<()> {
476 self.builder.append_null();
477 Ok(())
478 }
479
480 fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result<bool> {
481 if let Some(v) = $value_transform {
482 self.builder.append_value(v);
483 Ok(true)
484 } else {
485 if !self.cast_options.safe {
486 return Err(ArrowError::CastError(format!(
488 "Failed to extract primitive of type {} from variant {:?} at path VariantPath([])",
489 $type_name,
490 $value
491 )));
492 }
493 self.builder.append_null();
495 Ok(false)
496 }
497 }
498
499 #[allow(unused_mut)]
502 fn finish(mut self) -> Result<ArrayRef> {
503 Ok(Arc::new(self.builder.finish()))
504 }
505 }
506 }
507}
508
509define_variant_to_primitive_builder!(
510 struct VariantToStringArrowBuilder<'a, B: StringLikeArrayBuilder>
511 |capacity| -> B { B::with_capacity(capacity) },
512 |value| value.as_string(),
513 type_name: B::type_name()
514);
515
516define_variant_to_primitive_builder!(
517 struct VariantToBooleanArrowRowBuilder<'a>
518 |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
519 |value| value.as_boolean(),
520 type_name: datatypes::BooleanType::DATA_TYPE
521);
522
523define_variant_to_primitive_builder!(
524 struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant>
525 |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
526 |value| T::from_variant(value),
527 type_name: T::DATA_TYPE
528);
529
530define_variant_to_primitive_builder!(
531 struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant<true>>
532 |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
533 |value| T::from_variant(value),
534 type_name: T::DATA_TYPE
535);
536
537define_variant_to_primitive_builder!(
538 struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
539 |capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
540 PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
541 },
542 |value| T::from_variant(value),
543 type_name: T::DATA_TYPE
544);
545
546define_variant_to_primitive_builder!(
547 struct VariantToBinaryArrowRowBuilder<'a, B: BinaryLikeArrayBuilder>
548 |capacity| -> B { B::with_capacity(capacity) },
549 |value| value.as_u8_slice(),
550 type_name: B::type_name()
551);
552
553pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T>
555where
556 T: DecimalType,
557 T::Native: DecimalCast,
558{
559 builder: PrimitiveBuilder<T>,
560 cast_options: &'a CastOptions<'a>,
561 precision: u8,
562 scale: i8,
563}
564
565impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T>
566where
567 T: DecimalType,
568 T::Native: DecimalCast,
569{
570 fn new(
571 cast_options: &'a CastOptions<'a>,
572 capacity: usize,
573 precision: u8,
574 scale: i8,
575 ) -> Result<Self> {
576 let builder = PrimitiveBuilder::<T>::with_capacity(capacity)
577 .with_precision_and_scale(precision, scale)?;
578 Ok(Self {
579 builder,
580 cast_options,
581 precision,
582 scale,
583 })
584 }
585
586 fn append_null(&mut self) -> Result<()> {
587 self.builder.append_null();
588 Ok(())
589 }
590
591 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
592 if let Some(scaled) = variant_to_unscaled_decimal::<T>(value, self.precision, self.scale) {
593 self.builder.append_value(scaled);
594 Ok(true)
595 } else if self.cast_options.safe {
596 self.builder.append_null();
597 Ok(false)
598 } else {
599 Err(ArrowError::CastError(format!(
600 "Failed to cast to {}(precision={}, scale={}) from variant {:?}",
601 T::PREFIX,
602 self.precision,
603 self.scale,
604 value
605 )))
606 }
607 }
608
609 fn finish(mut self) -> Result<ArrayRef> {
610 Ok(Arc::new(self.builder.finish()))
611 }
612}
613
614pub(crate) struct VariantToUuidArrowRowBuilder<'a> {
616 builder: FixedSizeBinaryBuilder,
617 cast_options: &'a CastOptions<'a>,
618}
619
620impl<'a> VariantToUuidArrowRowBuilder<'a> {
621 fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
622 Self {
623 builder: FixedSizeBinaryBuilder::with_capacity(capacity, 16),
624 cast_options,
625 }
626 }
627
628 fn append_null(&mut self) -> Result<()> {
629 self.builder.append_null();
630 Ok(())
631 }
632
633 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
634 match value.as_uuid() {
635 Some(uuid) => {
636 self.builder
637 .append_value(uuid.as_bytes())
638 .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
639
640 Ok(true)
641 }
642 None if self.cast_options.safe => {
643 self.builder.append_null();
644 Ok(false)
645 }
646 None => Err(ArrowError::CastError(format!(
647 "Failed to extract UUID from variant {value:?}",
648 ))),
649 }
650 }
651
652 fn finish(mut self) -> Result<ArrayRef> {
653 Ok(Arc::new(self.builder.finish()))
654 }
655}
656
657pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
659 metadata: BinaryViewArray,
660 builder: VariantValueArrayBuilder,
661 nulls: NullBufferBuilder,
662}
663
664impl VariantToBinaryVariantArrowRowBuilder {
665 fn new(metadata: BinaryViewArray, capacity: usize) -> Self {
666 Self {
667 metadata,
668 builder: VariantValueArrayBuilder::new(capacity),
669 nulls: NullBufferBuilder::new(capacity),
670 }
671 }
672}
673
674impl VariantToBinaryVariantArrowRowBuilder {
675 fn append_null(&mut self) -> Result<()> {
676 self.builder.append_null();
677 self.nulls.append_null();
678 Ok(())
679 }
680
681 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
682 self.builder.append_value(value);
683 self.nulls.append_non_null();
684 Ok(true)
685 }
686
687 fn finish(mut self) -> Result<ArrayRef> {
688 let variant_array = VariantArray::from_parts(
689 self.metadata,
690 Some(self.builder.build()?),
691 None, self.nulls.finish(),
693 );
694
695 Ok(ArrayRef::from(variant_array))
696 }
697}
698
699#[derive(Default)]
700struct FakeNullBuilder {
701 item_count: usize,
702}
703
704impl FakeNullBuilder {
705 fn append_value(&mut self, _: ()) {
706 self.item_count += 1;
707 }
708
709 fn append_null(&mut self) {
710 self.item_count += 1;
711 }
712
713 fn finish(self) -> NullArray {
714 NullArray::new(self.item_count)
715 }
716}
717
718define_variant_to_primitive_builder!(
719 struct VariantToNullArrowRowBuilder<'a>
720 |_capacity| -> FakeNullBuilder { FakeNullBuilder::default() },
721 |value| value.as_null(),
722 type_name: "Null"
723);
724
725#[cfg(test)]
726mod tests {
727 use super::make_primitive_variant_to_arrow_row_builder;
728 use arrow::compute::CastOptions;
729 use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode};
730 use arrow::error::ArrowError;
731 use std::sync::Arc;
732
733 #[test]
734 fn make_primitive_builder_rejects_non_primitive_types() {
735 let cast_options = CastOptions::default();
736 let item_field = Arc::new(Field::new("item", DataType::Int32, true));
737 let struct_fields = Fields::from(vec![Field::new("child", DataType::Int32, true)]);
738 let map_entries_field = Arc::new(Field::new(
739 "entries",
740 DataType::Struct(Fields::from(vec![
741 Field::new("key", DataType::Utf8, false),
742 Field::new("value", DataType::Float64, true),
743 ])),
744 true,
745 ));
746 let union_fields =
747 UnionFields::new(vec![1], vec![Field::new("child", DataType::Int32, true)]);
748 let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false));
749 let ree_values_field = Arc::new(Field::new("values", DataType::Utf8, true));
750
751 let non_primitive_types = vec![
752 DataType::List(item_field.clone()),
753 DataType::LargeList(item_field.clone()),
754 DataType::ListView(item_field.clone()),
755 DataType::LargeListView(item_field.clone()),
756 DataType::FixedSizeList(item_field.clone(), 2),
757 DataType::Struct(struct_fields.clone()),
758 DataType::Map(map_entries_field.clone(), false),
759 DataType::Union(union_fields.clone(), UnionMode::Dense),
760 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
761 DataType::RunEndEncoded(run_ends_field.clone(), ree_values_field.clone()),
762 ];
763
764 for data_type in non_primitive_types {
765 let err =
766 match make_primitive_variant_to_arrow_row_builder(&data_type, &cast_options, 1) {
767 Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"),
768 Err(err) => err,
769 };
770
771 match err {
772 ArrowError::InvalidArgumentError(msg) => {
773 assert!(msg.contains(&format!("{data_type:?}")));
774 }
775 other => panic!("expected InvalidArgumentError, got {other:?}"),
776 }
777 }
778 }
779}