1use arrow::array::{
19 ArrayRef, BinaryBuilder, BinaryLikeArrayBuilder, BinaryViewArray, BinaryViewBuilder,
20 BooleanBuilder, FixedSizeBinaryBuilder, LargeBinaryBuilder, LargeStringBuilder, NullArray,
21 NullBufferBuilder, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder,
22};
23use arrow::compute::{CastOptions, DecimalCast};
24use arrow::datatypes::{self, DataType, DecimalType};
25use arrow::error::{ArrowError, Result};
26use parquet_variant::{Variant, VariantPath};
27
28use crate::type_conversion::{
29 PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal,
30};
31use crate::{VariantArray, VariantValueArrayBuilder};
32
33use arrow_schema::TimeUnit;
34use std::sync::Arc;
35
36pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
40 Null(VariantToNullArrowRowBuilder<'a>),
41 Boolean(VariantToBooleanArrowRowBuilder<'a>),
42 Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
43 Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
44 Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
45 Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>),
46 UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
47 UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
48 UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
49 UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
50 Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
51 Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
52 Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
53 Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>),
54 Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>),
55 Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>),
56 Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>),
57 TimestampSecond(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampSecondType>),
58 TimestampSecondNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampSecondType>),
59 TimestampMilli(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMillisecondType>),
60 TimestampMilliNtz(
61 VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMillisecondType>,
62 ),
63 TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>),
64 TimestampMicroNtz(
65 VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>,
66 ),
67 TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
68 TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
69 Time32Second(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32SecondType>),
70 Time32Milli(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32MillisecondType>),
71 Time64Micro(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>),
72 Time64Nano(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64NanosecondType>),
73 Date32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
74 Date64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date64Type>),
75 Uuid(VariantToUuidArrowRowBuilder<'a>),
76 String(VariantToStringArrowBuilder<'a, StringBuilder>),
77 LargeString(VariantToStringArrowBuilder<'a, LargeStringBuilder>),
78 StringView(VariantToStringArrowBuilder<'a, StringViewBuilder>),
79 Binary(VariantToBinaryArrowRowBuilder<'a, BinaryBuilder>),
80 LargeBinary(VariantToBinaryArrowRowBuilder<'a, LargeBinaryBuilder>),
81 BinaryView(VariantToBinaryArrowRowBuilder<'a, BinaryViewBuilder>),
82}
83
84pub(crate) enum VariantToArrowRowBuilder<'a> {
89 Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
90 BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
91
92 WithPath(VariantPathRowBuilder<'a>),
94}
95
96impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
97 pub fn append_null(&mut self) -> Result<()> {
98 use PrimitiveVariantToArrowRowBuilder::*;
99 match self {
100 Null(b) => b.append_null(),
101 Boolean(b) => b.append_null(),
102 Int8(b) => b.append_null(),
103 Int16(b) => b.append_null(),
104 Int32(b) => b.append_null(),
105 Int64(b) => b.append_null(),
106 UInt8(b) => b.append_null(),
107 UInt16(b) => b.append_null(),
108 UInt32(b) => b.append_null(),
109 UInt64(b) => b.append_null(),
110 Float16(b) => b.append_null(),
111 Float32(b) => b.append_null(),
112 Float64(b) => b.append_null(),
113 Decimal32(b) => b.append_null(),
114 Decimal64(b) => b.append_null(),
115 Decimal128(b) => b.append_null(),
116 Decimal256(b) => b.append_null(),
117 TimestampSecond(b) => b.append_null(),
118 TimestampSecondNtz(b) => b.append_null(),
119 TimestampMilli(b) => b.append_null(),
120 TimestampMilliNtz(b) => b.append_null(),
121 TimestampMicro(b) => b.append_null(),
122 TimestampMicroNtz(b) => b.append_null(),
123 TimestampNano(b) => b.append_null(),
124 TimestampNanoNtz(b) => b.append_null(),
125 Time32Second(b) => b.append_null(),
126 Time32Milli(b) => b.append_null(),
127 Time64Micro(b) => b.append_null(),
128 Time64Nano(b) => b.append_null(),
129 Date32(b) => b.append_null(),
130 Date64(b) => b.append_null(),
131 Uuid(b) => b.append_null(),
132 String(b) => b.append_null(),
133 LargeString(b) => b.append_null(),
134 StringView(b) => b.append_null(),
135 Binary(b) => b.append_null(),
136 LargeBinary(b) => b.append_null(),
137 BinaryView(b) => b.append_null(),
138 }
139 }
140
141 pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
142 use PrimitiveVariantToArrowRowBuilder::*;
143 match self {
144 Null(b) => b.append_value(value),
145 Boolean(b) => b.append_value(value),
146 Int8(b) => b.append_value(value),
147 Int16(b) => b.append_value(value),
148 Int32(b) => b.append_value(value),
149 Int64(b) => b.append_value(value),
150 UInt8(b) => b.append_value(value),
151 UInt16(b) => b.append_value(value),
152 UInt32(b) => b.append_value(value),
153 UInt64(b) => b.append_value(value),
154 Float16(b) => b.append_value(value),
155 Float32(b) => b.append_value(value),
156 Float64(b) => b.append_value(value),
157 Decimal32(b) => b.append_value(value),
158 Decimal64(b) => b.append_value(value),
159 Decimal128(b) => b.append_value(value),
160 Decimal256(b) => b.append_value(value),
161 TimestampSecond(b) => b.append_value(value),
162 TimestampSecondNtz(b) => b.append_value(value),
163 TimestampMilli(b) => b.append_value(value),
164 TimestampMilliNtz(b) => b.append_value(value),
165 TimestampMicro(b) => b.append_value(value),
166 TimestampMicroNtz(b) => b.append_value(value),
167 TimestampNano(b) => b.append_value(value),
168 TimestampNanoNtz(b) => b.append_value(value),
169 Time32Second(b) => b.append_value(value),
170 Time32Milli(b) => b.append_value(value),
171 Time64Micro(b) => b.append_value(value),
172 Time64Nano(b) => b.append_value(value),
173 Date32(b) => b.append_value(value),
174 Date64(b) => b.append_value(value),
175 Uuid(b) => b.append_value(value),
176 String(b) => b.append_value(value),
177 LargeString(b) => b.append_value(value),
178 StringView(b) => b.append_value(value),
179 Binary(b) => b.append_value(value),
180 LargeBinary(b) => b.append_value(value),
181 BinaryView(b) => b.append_value(value),
182 }
183 }
184
185 pub fn finish(self) -> Result<ArrayRef> {
186 use PrimitiveVariantToArrowRowBuilder::*;
187 match self {
188 Null(b) => b.finish(),
189 Boolean(b) => b.finish(),
190 Int8(b) => b.finish(),
191 Int16(b) => b.finish(),
192 Int32(b) => b.finish(),
193 Int64(b) => b.finish(),
194 UInt8(b) => b.finish(),
195 UInt16(b) => b.finish(),
196 UInt32(b) => b.finish(),
197 UInt64(b) => b.finish(),
198 Float16(b) => b.finish(),
199 Float32(b) => b.finish(),
200 Float64(b) => b.finish(),
201 Decimal32(b) => b.finish(),
202 Decimal64(b) => b.finish(),
203 Decimal128(b) => b.finish(),
204 Decimal256(b) => b.finish(),
205 TimestampSecond(b) => b.finish(),
206 TimestampSecondNtz(b) => b.finish(),
207 TimestampMilli(b) => b.finish(),
208 TimestampMilliNtz(b) => b.finish(),
209 TimestampMicro(b) => b.finish(),
210 TimestampMicroNtz(b) => b.finish(),
211 TimestampNano(b) => b.finish(),
212 TimestampNanoNtz(b) => b.finish(),
213 Time32Second(b) => b.finish(),
214 Time32Milli(b) => b.finish(),
215 Time64Micro(b) => b.finish(),
216 Time64Nano(b) => b.finish(),
217 Date32(b) => b.finish(),
218 Date64(b) => b.finish(),
219 Uuid(b) => b.finish(),
220 String(b) => b.finish(),
221 LargeString(b) => b.finish(),
222 StringView(b) => b.finish(),
223 Binary(b) => b.finish(),
224 LargeBinary(b) => b.finish(),
225 BinaryView(b) => b.finish(),
226 }
227 }
228}
229
230impl<'a> VariantToArrowRowBuilder<'a> {
231 pub fn append_null(&mut self) -> Result<()> {
232 use VariantToArrowRowBuilder::*;
233 match self {
234 Primitive(b) => b.append_null(),
235 BinaryVariant(b) => b.append_null(),
236 WithPath(path_builder) => path_builder.append_null(),
237 }
238 }
239
240 pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
241 use VariantToArrowRowBuilder::*;
242 match self {
243 Primitive(b) => b.append_value(&value),
244 BinaryVariant(b) => b.append_value(value),
245 WithPath(path_builder) => path_builder.append_value(value),
246 }
247 }
248
249 pub fn finish(self) -> Result<ArrayRef> {
250 use VariantToArrowRowBuilder::*;
251 match self {
252 Primitive(b) => b.finish(),
253 BinaryVariant(b) => b.finish(),
254 WithPath(path_builder) => path_builder.finish(),
255 }
256 }
257}
258
259pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
261 data_type: &'a DataType,
262 cast_options: &'a CastOptions,
263 capacity: usize,
264) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
265 use PrimitiveVariantToArrowRowBuilder::*;
266
267 let builder =
268 match data_type {
269 DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
270 DataType::Boolean => {
271 Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity))
272 }
273 DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
274 cast_options,
275 capacity,
276 )),
277 DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
278 cast_options,
279 capacity,
280 )),
281 DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
282 cast_options,
283 capacity,
284 )),
285 DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
286 cast_options,
287 capacity,
288 )),
289 DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
290 cast_options,
291 capacity,
292 )),
293 DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
294 cast_options,
295 capacity,
296 )),
297 DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
298 cast_options,
299 capacity,
300 )),
301 DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
302 cast_options,
303 capacity,
304 )),
305 DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
306 cast_options,
307 capacity,
308 )),
309 DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
310 cast_options,
311 capacity,
312 )),
313 DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
314 cast_options,
315 capacity,
316 )),
317 DataType::Decimal32(precision, scale) => Decimal32(
318 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
319 ),
320 DataType::Decimal64(precision, scale) => Decimal64(
321 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
322 ),
323 DataType::Decimal128(precision, scale) => Decimal128(
324 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
325 ),
326 DataType::Decimal256(precision, scale) => Decimal256(
327 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
328 ),
329 DataType::Date32 => Date32(VariantToPrimitiveArrowRowBuilder::new(
330 cast_options,
331 capacity,
332 )),
333 DataType::Date64 => Date64(VariantToPrimitiveArrowRowBuilder::new(
334 cast_options,
335 capacity,
336 )),
337 DataType::Time32(TimeUnit::Second) => Time32Second(
338 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
339 ),
340 DataType::Time32(TimeUnit::Millisecond) => Time32Milli(
341 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
342 ),
343 DataType::Time32(t) => {
344 return Err(ArrowError::InvalidArgumentError(format!(
345 "The unit for Time32 must be second/millisecond, received {t:?}"
346 )));
347 }
348 DataType::Time64(TimeUnit::Microsecond) => Time64Micro(
349 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
350 ),
351 DataType::Time64(TimeUnit::Nanosecond) => Time64Nano(
352 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
353 ),
354 DataType::Time64(t) => {
355 return Err(ArrowError::InvalidArgumentError(format!(
356 "The unit for Time64 must be micro/nano seconds, received {t:?}"
357 )));
358 }
359 DataType::Timestamp(TimeUnit::Second, None) => TimestampSecondNtz(
360 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
361 ),
362 DataType::Timestamp(TimeUnit::Second, tz) => TimestampSecond(
363 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
364 ),
365 DataType::Timestamp(TimeUnit::Millisecond, None) => TimestampMilliNtz(
366 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
367 ),
368 DataType::Timestamp(TimeUnit::Millisecond, tz) => TimestampMilli(
369 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
370 ),
371 DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
372 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
373 ),
374 DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
375 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
376 ),
377 DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
378 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
379 ),
380 DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
381 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
382 ),
383 DataType::Duration(_) | DataType::Interval(_) => {
384 return Err(ArrowError::InvalidArgumentError(
385 "Casting Variant to duration/interval types is not supported. \
386 The Variant format does not define duration/interval types."
387 .to_string(),
388 ));
389 }
390 DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)),
391 DataType::LargeBinary => {
392 LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
393 }
394 DataType::BinaryView => {
395 BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
396 }
397 DataType::FixedSizeBinary(16) => {
398 Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
399 }
400 DataType::FixedSizeBinary(_) => {
401 return Err(ArrowError::NotYetImplemented(format!(
402 "DataType {data_type:?} not yet implemented"
403 )));
404 }
405 DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)),
406 DataType::LargeUtf8 => {
407 LargeString(VariantToStringArrowBuilder::new(cast_options, capacity))
408 }
409 DataType::Utf8View => {
410 StringView(VariantToStringArrowBuilder::new(cast_options, capacity))
411 }
412 DataType::List(_)
413 | DataType::LargeList(_)
414 | DataType::ListView(_)
415 | DataType::LargeListView(_)
416 | DataType::FixedSizeList(..)
417 | DataType::Struct(_)
418 | DataType::Map(..)
419 | DataType::Union(..)
420 | DataType::Dictionary(..)
421 | DataType::RunEndEncoded(..) => {
422 return Err(ArrowError::InvalidArgumentError(format!(
423 "Casting to {data_type:?} is not applicable for primitive Variant types"
424 )));
425 }
426 };
427 Ok(builder)
428}
429
430pub(crate) fn make_variant_to_arrow_row_builder<'a>(
431 metadata: &BinaryViewArray,
432 path: VariantPath<'a>,
433 data_type: Option<&'a DataType>,
434 cast_options: &'a CastOptions,
435 capacity: usize,
436) -> Result<VariantToArrowRowBuilder<'a>> {
437 use VariantToArrowRowBuilder::*;
438
439 let mut builder = match data_type {
440 None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
442 metadata.clone(),
443 capacity,
444 )),
445 Some(DataType::Struct(_)) => {
446 return Err(ArrowError::NotYetImplemented(
447 "Converting unshredded variant objects to arrow structs".to_string(),
448 ));
449 }
450 Some(
451 DataType::List(_)
452 | DataType::LargeList(_)
453 | DataType::ListView(_)
454 | DataType::LargeListView(_)
455 | DataType::FixedSizeList(..),
456 ) => {
457 return Err(ArrowError::NotYetImplemented(
458 "Converting unshredded variant arrays to arrow lists".to_string(),
459 ));
460 }
461 Some(data_type) => {
462 let builder =
463 make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
464 Primitive(builder)
465 }
466 };
467
468 if !path.is_empty() {
470 builder = WithPath(VariantPathRowBuilder {
471 builder: Box::new(builder),
472 path,
473 })
474 };
475
476 Ok(builder)
477}
478
479pub(crate) struct VariantPathRowBuilder<'a> {
482 builder: Box<VariantToArrowRowBuilder<'a>>,
483 path: VariantPath<'a>,
484}
485
486impl<'a> VariantPathRowBuilder<'a> {
487 fn append_null(&mut self) -> Result<()> {
488 self.builder.append_null()
489 }
490
491 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
492 if let Some(v) = value.get_path(&self.path) {
493 self.builder.append_value(v)
494 } else {
495 self.builder.append_null()?;
496 Ok(false)
497 }
498 }
499
500 fn finish(self) -> Result<ArrayRef> {
501 self.builder.finish()
502 }
503}
504
505macro_rules! define_variant_to_primitive_builder {
506 (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
507 |$array_param:ident $(, $field:ident: $field_type:ty)?| -> $builder_name:ident $(< $array_type:ty >)? { $init_expr: expr },
508 |$value: ident| $value_transform:expr,
509 type_name: $type_name:expr) => {
510 pub(crate) struct $name<$lifetime $(, $generic : $bound )?>
511 {
512 builder: $builder_name $(<$array_type>)?,
513 cast_options: &$lifetime CastOptions<$lifetime>,
514 }
515
516 impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic )?> {
517 fn new(
518 cast_options: &$lifetime CastOptions<$lifetime>,
519 $array_param: usize,
520 $( $field: $field_type, )?
522 ) -> Self {
523 Self {
524 builder: $init_expr,
525 cast_options,
526 }
527 }
528
529 fn append_null(&mut self) -> Result<()> {
530 self.builder.append_null();
531 Ok(())
532 }
533
534 fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result<bool> {
535 if let Some(v) = $value_transform {
536 self.builder.append_value(v);
537 Ok(true)
538 } else {
539 if !self.cast_options.safe {
540 return Err(ArrowError::CastError(format!(
542 "Failed to extract primitive of type {} from variant {:?} at path VariantPath([])",
543 $type_name,
544 $value
545 )));
546 }
547 self.builder.append_null();
549 Ok(false)
550 }
551 }
552
553 #[allow(unused_mut)]
556 fn finish(mut self) -> Result<ArrayRef> {
557 Ok(Arc::new(self.builder.finish()))
558 }
559 }
560 }
561}
562
563define_variant_to_primitive_builder!(
564 struct VariantToStringArrowBuilder<'a, B: StringLikeArrayBuilder>
565 |capacity| -> B { B::with_capacity(capacity) },
566 |value| value.as_string(),
567 type_name: B::type_name()
568);
569
570define_variant_to_primitive_builder!(
571 struct VariantToBooleanArrowRowBuilder<'a>
572 |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
573 |value| value.as_boolean(),
574 type_name: datatypes::BooleanType::DATA_TYPE
575);
576
577define_variant_to_primitive_builder!(
578 struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant>
579 |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
580 |value| T::from_variant(value),
581 type_name: T::DATA_TYPE
582);
583
584define_variant_to_primitive_builder!(
585 struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant<true>>
586 |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
587 |value| T::from_variant(value),
588 type_name: T::DATA_TYPE
589);
590
591define_variant_to_primitive_builder!(
592 struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
593 |capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
594 PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
595 },
596 |value| T::from_variant(value),
597 type_name: T::DATA_TYPE
598);
599
600define_variant_to_primitive_builder!(
601 struct VariantToBinaryArrowRowBuilder<'a, B: BinaryLikeArrayBuilder>
602 |capacity| -> B { B::with_capacity(capacity) },
603 |value| value.as_u8_slice(),
604 type_name: B::type_name()
605);
606
607pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T>
609where
610 T: DecimalType,
611 T::Native: DecimalCast,
612{
613 builder: PrimitiveBuilder<T>,
614 cast_options: &'a CastOptions<'a>,
615 precision: u8,
616 scale: i8,
617}
618
619impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T>
620where
621 T: DecimalType,
622 T::Native: DecimalCast,
623{
624 fn new(
625 cast_options: &'a CastOptions<'a>,
626 capacity: usize,
627 precision: u8,
628 scale: i8,
629 ) -> Result<Self> {
630 let builder = PrimitiveBuilder::<T>::with_capacity(capacity)
631 .with_precision_and_scale(precision, scale)?;
632 Ok(Self {
633 builder,
634 cast_options,
635 precision,
636 scale,
637 })
638 }
639
640 fn append_null(&mut self) -> Result<()> {
641 self.builder.append_null();
642 Ok(())
643 }
644
645 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
646 if let Some(scaled) = variant_to_unscaled_decimal::<T>(value, self.precision, self.scale) {
647 self.builder.append_value(scaled);
648 Ok(true)
649 } else if self.cast_options.safe {
650 self.builder.append_null();
651 Ok(false)
652 } else {
653 Err(ArrowError::CastError(format!(
654 "Failed to cast to {}(precision={}, scale={}) from variant {:?}",
655 T::PREFIX,
656 self.precision,
657 self.scale,
658 value
659 )))
660 }
661 }
662
663 fn finish(mut self) -> Result<ArrayRef> {
664 Ok(Arc::new(self.builder.finish()))
665 }
666}
667
668pub(crate) struct VariantToUuidArrowRowBuilder<'a> {
670 builder: FixedSizeBinaryBuilder,
671 cast_options: &'a CastOptions<'a>,
672}
673
674impl<'a> VariantToUuidArrowRowBuilder<'a> {
675 fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
676 Self {
677 builder: FixedSizeBinaryBuilder::with_capacity(capacity, 16),
678 cast_options,
679 }
680 }
681
682 fn append_null(&mut self) -> Result<()> {
683 self.builder.append_null();
684 Ok(())
685 }
686
687 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
688 match value.as_uuid() {
689 Some(uuid) => {
690 self.builder
691 .append_value(uuid.as_bytes())
692 .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
693
694 Ok(true)
695 }
696 None if self.cast_options.safe => {
697 self.builder.append_null();
698 Ok(false)
699 }
700 None => Err(ArrowError::CastError(format!(
701 "Failed to extract UUID from variant {value:?}",
702 ))),
703 }
704 }
705
706 fn finish(mut self) -> Result<ArrayRef> {
707 Ok(Arc::new(self.builder.finish()))
708 }
709}
710
711pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
713 metadata: BinaryViewArray,
714 builder: VariantValueArrayBuilder,
715 nulls: NullBufferBuilder,
716}
717
718impl VariantToBinaryVariantArrowRowBuilder {
719 fn new(metadata: BinaryViewArray, capacity: usize) -> Self {
720 Self {
721 metadata,
722 builder: VariantValueArrayBuilder::new(capacity),
723 nulls: NullBufferBuilder::new(capacity),
724 }
725 }
726}
727
728impl VariantToBinaryVariantArrowRowBuilder {
729 fn append_null(&mut self) -> Result<()> {
730 self.builder.append_null();
731 self.nulls.append_null();
732 Ok(())
733 }
734
735 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
736 self.builder.append_value(value);
737 self.nulls.append_non_null();
738 Ok(true)
739 }
740
741 fn finish(mut self) -> Result<ArrayRef> {
742 let variant_array = VariantArray::from_parts(
743 self.metadata,
744 Some(self.builder.build()?),
745 None, self.nulls.finish(),
747 );
748
749 Ok(ArrayRef::from(variant_array))
750 }
751}
752
753#[derive(Default)]
754struct FakeNullBuilder {
755 item_count: usize,
756}
757
758impl FakeNullBuilder {
759 fn append_value(&mut self, _: ()) {
760 self.item_count += 1;
761 }
762
763 fn append_null(&mut self) {
764 self.item_count += 1;
765 }
766
767 fn finish(self) -> NullArray {
768 NullArray::new(self.item_count)
769 }
770}
771
772define_variant_to_primitive_builder!(
773 struct VariantToNullArrowRowBuilder<'a>
774 |_capacity| -> FakeNullBuilder { FakeNullBuilder::default() },
775 |value| value.as_null(),
776 type_name: "Null"
777);
778
779#[cfg(test)]
780mod tests {
781 use super::make_primitive_variant_to_arrow_row_builder;
782 use arrow::compute::CastOptions;
783 use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode};
784 use arrow::error::ArrowError;
785 use std::sync::Arc;
786
787 #[test]
788 fn make_primitive_builder_rejects_non_primitive_types() {
789 let cast_options = CastOptions::default();
790 let item_field = Arc::new(Field::new("item", DataType::Int32, true));
791 let struct_fields = Fields::from(vec![Field::new("child", DataType::Int32, true)]);
792 let map_entries_field = Arc::new(Field::new(
793 "entries",
794 DataType::Struct(Fields::from(vec![
795 Field::new("key", DataType::Utf8, false),
796 Field::new("value", DataType::Float64, true),
797 ])),
798 true,
799 ));
800 let union_fields =
801 UnionFields::new(vec![1], vec![Field::new("child", DataType::Int32, true)]);
802 let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false));
803 let ree_values_field = Arc::new(Field::new("values", DataType::Utf8, true));
804
805 let non_primitive_types = vec![
806 DataType::List(item_field.clone()),
807 DataType::LargeList(item_field.clone()),
808 DataType::ListView(item_field.clone()),
809 DataType::LargeListView(item_field.clone()),
810 DataType::FixedSizeList(item_field.clone(), 2),
811 DataType::Struct(struct_fields.clone()),
812 DataType::Map(map_entries_field.clone(), false),
813 DataType::Union(union_fields.clone(), UnionMode::Dense),
814 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
815 DataType::RunEndEncoded(run_ends_field.clone(), ree_values_field.clone()),
816 ];
817
818 for data_type in non_primitive_types {
819 let err =
820 match make_primitive_variant_to_arrow_row_builder(&data_type, &cast_options, 1) {
821 Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"),
822 Err(err) => err,
823 };
824
825 match err {
826 ArrowError::InvalidArgumentError(msg) => {
827 assert!(msg.contains(&format!("{data_type:?}")));
828 }
829 other => panic!("expected InvalidArgumentError, got {other:?}"),
830 }
831 }
832 }
833}