1use crate::shred_variant::{
19 VariantToShreddedVariantRowBuilder, make_variant_to_shredded_variant_arrow_row_builder,
20};
21use crate::type_conversion::{
22 PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal,
23};
24use crate::variant_array::ShreddedVariantFieldArray;
25use crate::{VariantArray, VariantValueArrayBuilder};
26use arrow::array::{
27 ArrayRef, ArrowNativeTypeOp, BinaryBuilder, BinaryLikeArrayBuilder, BinaryViewArray,
28 BinaryViewBuilder, BooleanBuilder, FixedSizeBinaryBuilder, GenericListArray,
29 GenericListViewArray, LargeBinaryBuilder, LargeStringBuilder, NullArray, NullBufferBuilder,
30 OffsetSizeTrait, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder,
31};
32use arrow::buffer::{OffsetBuffer, ScalarBuffer};
33use arrow::compute::{CastOptions, DecimalCast};
34use arrow::datatypes::{self, DataType, DecimalType};
35use arrow::error::{ArrowError, Result};
36use arrow_schema::{FieldRef, TimeUnit};
37use parquet_variant::{Variant, VariantPath};
38use std::sync::Arc;
39
40pub(crate) enum VariantToArrowRowBuilder<'a> {
45 Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
46 Array(ArrayVariantToArrowRowBuilder<'a>),
47 BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
48
49 WithPath(VariantPathRowBuilder<'a>),
51}
52
53impl<'a> VariantToArrowRowBuilder<'a> {
54 pub fn append_null(&mut self) -> Result<()> {
55 use VariantToArrowRowBuilder::*;
56 match self {
57 Primitive(b) => b.append_null(),
58 Array(b) => b.append_null(),
59 BinaryVariant(b) => b.append_null(),
60 WithPath(path_builder) => path_builder.append_null(),
61 }
62 }
63
64 pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
65 use VariantToArrowRowBuilder::*;
66 match self {
67 Primitive(b) => b.append_value(&value),
68 Array(b) => b.append_value(&value),
69 BinaryVariant(b) => b.append_value(value),
70 WithPath(path_builder) => path_builder.append_value(value),
71 }
72 }
73
74 pub fn finish(self) -> Result<ArrayRef> {
75 use VariantToArrowRowBuilder::*;
76 match self {
77 Primitive(b) => b.finish(),
78 Array(b) => b.finish(),
79 BinaryVariant(b) => b.finish(),
80 WithPath(path_builder) => path_builder.finish(),
81 }
82 }
83}
84
85pub(crate) fn make_variant_to_arrow_row_builder<'a>(
86 metadata: &BinaryViewArray,
87 path: VariantPath<'a>,
88 data_type: Option<&'a DataType>,
89 cast_options: &'a CastOptions,
90 capacity: usize,
91) -> Result<VariantToArrowRowBuilder<'a>> {
92 use VariantToArrowRowBuilder::*;
93
94 let mut builder = match data_type {
95 None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
97 metadata.clone(),
98 capacity,
99 )),
100 Some(DataType::Struct(_)) => {
101 return Err(ArrowError::NotYetImplemented(
102 "Converting unshredded variant objects to arrow structs".to_string(),
103 ));
104 }
105 Some(
106 data_type @ (DataType::List(_)
107 | DataType::LargeList(_)
108 | DataType::ListView(_)
109 | DataType::LargeListView(_)
110 | DataType::FixedSizeList(..)),
111 ) => {
112 let builder =
113 ArrayVariantToArrowRowBuilder::try_new(data_type, cast_options, capacity)?;
114 Array(builder)
115 }
116 Some(data_type) => {
117 let builder =
118 make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
119 Primitive(builder)
120 }
121 };
122
123 if !path.is_empty() {
125 builder = WithPath(VariantPathRowBuilder {
126 builder: Box::new(builder),
127 path,
128 })
129 };
130
131 Ok(builder)
132}
133
134pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
138 Null(VariantToNullArrowRowBuilder<'a>),
139 Boolean(VariantToBooleanArrowRowBuilder<'a>),
140 Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
141 Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
142 Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
143 Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>),
144 UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
145 UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
146 UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
147 UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
148 Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
149 Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
150 Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
151 Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>),
152 Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>),
153 Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>),
154 Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>),
155 TimestampSecond(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampSecondType>),
156 TimestampSecondNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampSecondType>),
157 TimestampMilli(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMillisecondType>),
158 TimestampMilliNtz(
159 VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMillisecondType>,
160 ),
161 TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>),
162 TimestampMicroNtz(
163 VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>,
164 ),
165 TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
166 TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
167 Time32Second(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32SecondType>),
168 Time32Milli(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32MillisecondType>),
169 Time64Micro(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>),
170 Time64Nano(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64NanosecondType>),
171 Date32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
172 Date64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date64Type>),
173 Uuid(VariantToUuidArrowRowBuilder<'a>),
174 String(VariantToStringArrowBuilder<'a, StringBuilder>),
175 LargeString(VariantToStringArrowBuilder<'a, LargeStringBuilder>),
176 StringView(VariantToStringArrowBuilder<'a, StringViewBuilder>),
177 Binary(VariantToBinaryArrowRowBuilder<'a, BinaryBuilder>),
178 LargeBinary(VariantToBinaryArrowRowBuilder<'a, LargeBinaryBuilder>),
179 BinaryView(VariantToBinaryArrowRowBuilder<'a, BinaryViewBuilder>),
180}
181
182impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
183 pub fn append_null(&mut self) -> Result<()> {
184 use PrimitiveVariantToArrowRowBuilder::*;
185 match self {
186 Null(b) => b.append_null(),
187 Boolean(b) => b.append_null(),
188 Int8(b) => b.append_null(),
189 Int16(b) => b.append_null(),
190 Int32(b) => b.append_null(),
191 Int64(b) => b.append_null(),
192 UInt8(b) => b.append_null(),
193 UInt16(b) => b.append_null(),
194 UInt32(b) => b.append_null(),
195 UInt64(b) => b.append_null(),
196 Float16(b) => b.append_null(),
197 Float32(b) => b.append_null(),
198 Float64(b) => b.append_null(),
199 Decimal32(b) => b.append_null(),
200 Decimal64(b) => b.append_null(),
201 Decimal128(b) => b.append_null(),
202 Decimal256(b) => b.append_null(),
203 TimestampSecond(b) => b.append_null(),
204 TimestampSecondNtz(b) => b.append_null(),
205 TimestampMilli(b) => b.append_null(),
206 TimestampMilliNtz(b) => b.append_null(),
207 TimestampMicro(b) => b.append_null(),
208 TimestampMicroNtz(b) => b.append_null(),
209 TimestampNano(b) => b.append_null(),
210 TimestampNanoNtz(b) => b.append_null(),
211 Time32Second(b) => b.append_null(),
212 Time32Milli(b) => b.append_null(),
213 Time64Micro(b) => b.append_null(),
214 Time64Nano(b) => b.append_null(),
215 Date32(b) => b.append_null(),
216 Date64(b) => b.append_null(),
217 Uuid(b) => b.append_null(),
218 String(b) => b.append_null(),
219 LargeString(b) => b.append_null(),
220 StringView(b) => b.append_null(),
221 Binary(b) => b.append_null(),
222 LargeBinary(b) => b.append_null(),
223 BinaryView(b) => b.append_null(),
224 }
225 }
226
227 pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
228 use PrimitiveVariantToArrowRowBuilder::*;
229 match self {
230 Null(b) => b.append_value(value),
231 Boolean(b) => b.append_value(value),
232 Int8(b) => b.append_value(value),
233 Int16(b) => b.append_value(value),
234 Int32(b) => b.append_value(value),
235 Int64(b) => b.append_value(value),
236 UInt8(b) => b.append_value(value),
237 UInt16(b) => b.append_value(value),
238 UInt32(b) => b.append_value(value),
239 UInt64(b) => b.append_value(value),
240 Float16(b) => b.append_value(value),
241 Float32(b) => b.append_value(value),
242 Float64(b) => b.append_value(value),
243 Decimal32(b) => b.append_value(value),
244 Decimal64(b) => b.append_value(value),
245 Decimal128(b) => b.append_value(value),
246 Decimal256(b) => b.append_value(value),
247 TimestampSecond(b) => b.append_value(value),
248 TimestampSecondNtz(b) => b.append_value(value),
249 TimestampMilli(b) => b.append_value(value),
250 TimestampMilliNtz(b) => b.append_value(value),
251 TimestampMicro(b) => b.append_value(value),
252 TimestampMicroNtz(b) => b.append_value(value),
253 TimestampNano(b) => b.append_value(value),
254 TimestampNanoNtz(b) => b.append_value(value),
255 Time32Second(b) => b.append_value(value),
256 Time32Milli(b) => b.append_value(value),
257 Time64Micro(b) => b.append_value(value),
258 Time64Nano(b) => b.append_value(value),
259 Date32(b) => b.append_value(value),
260 Date64(b) => b.append_value(value),
261 Uuid(b) => b.append_value(value),
262 String(b) => b.append_value(value),
263 LargeString(b) => b.append_value(value),
264 StringView(b) => b.append_value(value),
265 Binary(b) => b.append_value(value),
266 LargeBinary(b) => b.append_value(value),
267 BinaryView(b) => b.append_value(value),
268 }
269 }
270
271 pub fn finish(self) -> Result<ArrayRef> {
272 use PrimitiveVariantToArrowRowBuilder::*;
273 match self {
274 Null(b) => b.finish(),
275 Boolean(b) => b.finish(),
276 Int8(b) => b.finish(),
277 Int16(b) => b.finish(),
278 Int32(b) => b.finish(),
279 Int64(b) => b.finish(),
280 UInt8(b) => b.finish(),
281 UInt16(b) => b.finish(),
282 UInt32(b) => b.finish(),
283 UInt64(b) => b.finish(),
284 Float16(b) => b.finish(),
285 Float32(b) => b.finish(),
286 Float64(b) => b.finish(),
287 Decimal32(b) => b.finish(),
288 Decimal64(b) => b.finish(),
289 Decimal128(b) => b.finish(),
290 Decimal256(b) => b.finish(),
291 TimestampSecond(b) => b.finish(),
292 TimestampSecondNtz(b) => b.finish(),
293 TimestampMilli(b) => b.finish(),
294 TimestampMilliNtz(b) => b.finish(),
295 TimestampMicro(b) => b.finish(),
296 TimestampMicroNtz(b) => b.finish(),
297 TimestampNano(b) => b.finish(),
298 TimestampNanoNtz(b) => b.finish(),
299 Time32Second(b) => b.finish(),
300 Time32Milli(b) => b.finish(),
301 Time64Micro(b) => b.finish(),
302 Time64Nano(b) => b.finish(),
303 Date32(b) => b.finish(),
304 Date64(b) => b.finish(),
305 Uuid(b) => b.finish(),
306 String(b) => b.finish(),
307 LargeString(b) => b.finish(),
308 StringView(b) => b.finish(),
309 Binary(b) => b.finish(),
310 LargeBinary(b) => b.finish(),
311 BinaryView(b) => b.finish(),
312 }
313 }
314}
315
316pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
318 data_type: &'a DataType,
319 cast_options: &'a CastOptions,
320 capacity: usize,
321) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
322 use PrimitiveVariantToArrowRowBuilder::*;
323
324 let builder =
325 match data_type {
326 DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
327 DataType::Boolean => {
328 Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity))
329 }
330 DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
331 cast_options,
332 capacity,
333 )),
334 DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
335 cast_options,
336 capacity,
337 )),
338 DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
339 cast_options,
340 capacity,
341 )),
342 DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
343 cast_options,
344 capacity,
345 )),
346 DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
347 cast_options,
348 capacity,
349 )),
350 DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
351 cast_options,
352 capacity,
353 )),
354 DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
355 cast_options,
356 capacity,
357 )),
358 DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
359 cast_options,
360 capacity,
361 )),
362 DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
363 cast_options,
364 capacity,
365 )),
366 DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
367 cast_options,
368 capacity,
369 )),
370 DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
371 cast_options,
372 capacity,
373 )),
374 DataType::Decimal32(precision, scale) => Decimal32(
375 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
376 ),
377 DataType::Decimal64(precision, scale) => Decimal64(
378 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
379 ),
380 DataType::Decimal128(precision, scale) => Decimal128(
381 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
382 ),
383 DataType::Decimal256(precision, scale) => Decimal256(
384 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
385 ),
386 DataType::Date32 => Date32(VariantToPrimitiveArrowRowBuilder::new(
387 cast_options,
388 capacity,
389 )),
390 DataType::Date64 => Date64(VariantToPrimitiveArrowRowBuilder::new(
391 cast_options,
392 capacity,
393 )),
394 DataType::Time32(TimeUnit::Second) => Time32Second(
395 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
396 ),
397 DataType::Time32(TimeUnit::Millisecond) => Time32Milli(
398 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
399 ),
400 DataType::Time32(t) => {
401 return Err(ArrowError::InvalidArgumentError(format!(
402 "The unit for Time32 must be second/millisecond, received {t:?}"
403 )));
404 }
405 DataType::Time64(TimeUnit::Microsecond) => Time64Micro(
406 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
407 ),
408 DataType::Time64(TimeUnit::Nanosecond) => Time64Nano(
409 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
410 ),
411 DataType::Time64(t) => {
412 return Err(ArrowError::InvalidArgumentError(format!(
413 "The unit for Time64 must be micro/nano seconds, received {t:?}"
414 )));
415 }
416 DataType::Timestamp(TimeUnit::Second, None) => TimestampSecondNtz(
417 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
418 ),
419 DataType::Timestamp(TimeUnit::Second, tz) => TimestampSecond(
420 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
421 ),
422 DataType::Timestamp(TimeUnit::Millisecond, None) => TimestampMilliNtz(
423 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
424 ),
425 DataType::Timestamp(TimeUnit::Millisecond, tz) => TimestampMilli(
426 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
427 ),
428 DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
429 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
430 ),
431 DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
432 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
433 ),
434 DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
435 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
436 ),
437 DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
438 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
439 ),
440 DataType::Duration(_) | DataType::Interval(_) => {
441 return Err(ArrowError::InvalidArgumentError(
442 "Casting Variant to duration/interval types is not supported. \
443 The Variant format does not define duration/interval types."
444 .to_string(),
445 ));
446 }
447 DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)),
448 DataType::LargeBinary => {
449 LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
450 }
451 DataType::BinaryView => {
452 BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
453 }
454 DataType::FixedSizeBinary(16) => {
455 Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
456 }
457 DataType::FixedSizeBinary(_) => {
458 return Err(ArrowError::NotYetImplemented(format!(
459 "DataType {data_type:?} not yet implemented"
460 )));
461 }
462 DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)),
463 DataType::LargeUtf8 => {
464 LargeString(VariantToStringArrowBuilder::new(cast_options, capacity))
465 }
466 DataType::Utf8View => {
467 StringView(VariantToStringArrowBuilder::new(cast_options, capacity))
468 }
469 DataType::List(_)
470 | DataType::LargeList(_)
471 | DataType::ListView(_)
472 | DataType::LargeListView(_)
473 | DataType::FixedSizeList(..)
474 | DataType::Struct(_)
475 | DataType::Map(..)
476 | DataType::Union(..)
477 | DataType::Dictionary(..)
478 | DataType::RunEndEncoded(..) => {
479 return Err(ArrowError::InvalidArgumentError(format!(
480 "Casting to {data_type:?} is not applicable for primitive Variant types"
481 )));
482 }
483 };
484 Ok(builder)
485}
486
487pub(crate) enum ArrayVariantToArrowRowBuilder<'a> {
488 List(VariantToListArrowRowBuilder<'a, i32, false>),
489 LargeList(VariantToListArrowRowBuilder<'a, i64, false>),
490 ListView(VariantToListArrowRowBuilder<'a, i32, true>),
491 LargeListView(VariantToListArrowRowBuilder<'a, i64, true>),
492}
493
494impl<'a> ArrayVariantToArrowRowBuilder<'a> {
495 pub(crate) fn try_new(
496 data_type: &'a DataType,
497 cast_options: &'a CastOptions,
498 capacity: usize,
499 ) -> Result<Self> {
500 use ArrayVariantToArrowRowBuilder::*;
501
502 macro_rules! make_list_builder {
504 ($variant:ident, $offset:ty, $is_view:expr, $field:ident) => {
505 $variant(VariantToListArrowRowBuilder::<$offset, $is_view>::try_new(
506 $field.clone(),
507 $field.data_type(),
508 cast_options,
509 capacity,
510 )?)
511 };
512 }
513
514 let builder = match data_type {
515 DataType::List(field) => make_list_builder!(List, i32, false, field),
516 DataType::LargeList(field) => make_list_builder!(LargeList, i64, false, field),
517 DataType::ListView(field) => make_list_builder!(ListView, i32, true, field),
518 DataType::LargeListView(field) => make_list_builder!(LargeListView, i64, true, field),
519 DataType::FixedSizeList(..) => {
520 return Err(ArrowError::NotYetImplemented(
521 "Converting unshredded variant arrays to arrow fixed-size lists".to_string(),
522 ));
523 }
524 other => {
525 return Err(ArrowError::InvalidArgumentError(format!(
526 "Casting to {other:?} is not applicable for array Variant types"
527 )));
528 }
529 };
530 Ok(builder)
531 }
532
533 pub(crate) fn append_null(&mut self) -> Result<()> {
534 match self {
535 Self::List(builder) => builder.append_null(),
536 Self::LargeList(builder) => builder.append_null(),
537 Self::ListView(builder) => builder.append_null(),
538 Self::LargeListView(builder) => builder.append_null(),
539 }
540 }
541
542 pub(crate) fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
543 match self {
544 Self::List(builder) => builder.append_value(value),
545 Self::LargeList(builder) => builder.append_value(value),
546 Self::ListView(builder) => builder.append_value(value),
547 Self::LargeListView(builder) => builder.append_value(value),
548 }
549 }
550
551 pub(crate) fn finish(self) -> Result<ArrayRef> {
552 match self {
553 Self::List(builder) => builder.finish(),
554 Self::LargeList(builder) => builder.finish(),
555 Self::ListView(builder) => builder.finish(),
556 Self::LargeListView(builder) => builder.finish(),
557 }
558 }
559}
560
561pub(crate) struct VariantPathRowBuilder<'a> {
564 builder: Box<VariantToArrowRowBuilder<'a>>,
565 path: VariantPath<'a>,
566}
567
568impl<'a> VariantPathRowBuilder<'a> {
569 fn append_null(&mut self) -> Result<()> {
570 self.builder.append_null()
571 }
572
573 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
574 if let Some(v) = value.get_path(&self.path) {
575 self.builder.append_value(v)
576 } else {
577 self.builder.append_null()?;
578 Ok(false)
579 }
580 }
581
582 fn finish(self) -> Result<ArrayRef> {
583 self.builder.finish()
584 }
585}
586
587macro_rules! define_variant_to_primitive_builder {
588 (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
589 |$array_param:ident $(, $field:ident: $field_type:ty)?| -> $builder_name:ident $(< $array_type:ty >)? { $init_expr: expr },
590 |$value: ident| $value_transform:expr,
591 type_name: $type_name:expr) => {
592 pub(crate) struct $name<$lifetime $(, $generic : $bound )?>
593 {
594 builder: $builder_name $(<$array_type>)?,
595 cast_options: &$lifetime CastOptions<$lifetime>,
596 }
597
598 impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic )?> {
599 fn new(
600 cast_options: &$lifetime CastOptions<$lifetime>,
601 $array_param: usize,
602 $( $field: $field_type, )?
604 ) -> Self {
605 Self {
606 builder: $init_expr,
607 cast_options,
608 }
609 }
610
611 fn append_null(&mut self) -> Result<()> {
612 self.builder.append_null();
613 Ok(())
614 }
615
616 fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result<bool> {
617 if let Some(v) = $value_transform {
618 self.builder.append_value(v);
619 Ok(true)
620 } else {
621 if !self.cast_options.safe {
622 return Err(ArrowError::CastError(format!(
624 "Failed to extract primitive of type {} from variant {:?} at path VariantPath([])",
625 $type_name,
626 $value
627 )));
628 }
629 self.builder.append_null();
631 Ok(false)
632 }
633 }
634
635 #[allow(unused_mut)]
638 fn finish(mut self) -> Result<ArrayRef> {
639 Ok(Arc::from(self.builder.finish()))
643 }
644 }
645 }
646}
647
648define_variant_to_primitive_builder!(
649 struct VariantToStringArrowBuilder<'a, B: StringLikeArrayBuilder>
650 |capacity| -> B { B::with_capacity(capacity) },
651 |value| value.as_string(),
652 type_name: B::type_name()
653);
654
655define_variant_to_primitive_builder!(
656 struct VariantToBooleanArrowRowBuilder<'a>
657 |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
658 |value| value.as_boolean(),
659 type_name: datatypes::BooleanType::DATA_TYPE
660);
661
662define_variant_to_primitive_builder!(
663 struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant>
664 |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
665 |value| T::from_variant(value),
666 type_name: T::DATA_TYPE
667);
668
669define_variant_to_primitive_builder!(
670 struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant<true>>
671 |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
672 |value| T::from_variant(value),
673 type_name: T::DATA_TYPE
674);
675
676define_variant_to_primitive_builder!(
677 struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
678 |capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
679 PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
680 },
681 |value| T::from_variant(value),
682 type_name: T::DATA_TYPE
683);
684
685define_variant_to_primitive_builder!(
686 struct VariantToBinaryArrowRowBuilder<'a, B: BinaryLikeArrayBuilder>
687 |capacity| -> B { B::with_capacity(capacity) },
688 |value| value.as_u8_slice(),
689 type_name: B::type_name()
690);
691
692pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T>
694where
695 T: DecimalType,
696 T::Native: DecimalCast,
697{
698 builder: PrimitiveBuilder<T>,
699 cast_options: &'a CastOptions<'a>,
700 precision: u8,
701 scale: i8,
702}
703
704impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T>
705where
706 T: DecimalType,
707 T::Native: DecimalCast,
708{
709 fn new(
710 cast_options: &'a CastOptions<'a>,
711 capacity: usize,
712 precision: u8,
713 scale: i8,
714 ) -> Result<Self> {
715 let builder = PrimitiveBuilder::<T>::with_capacity(capacity)
716 .with_precision_and_scale(precision, scale)?;
717 Ok(Self {
718 builder,
719 cast_options,
720 precision,
721 scale,
722 })
723 }
724
725 fn append_null(&mut self) -> Result<()> {
726 self.builder.append_null();
727 Ok(())
728 }
729
730 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
731 if let Some(scaled) = variant_to_unscaled_decimal::<T>(value, self.precision, self.scale) {
732 self.builder.append_value(scaled);
733 Ok(true)
734 } else if self.cast_options.safe {
735 self.builder.append_null();
736 Ok(false)
737 } else {
738 Err(ArrowError::CastError(format!(
739 "Failed to cast to {}(precision={}, scale={}) from variant {:?}",
740 T::PREFIX,
741 self.precision,
742 self.scale,
743 value
744 )))
745 }
746 }
747
748 fn finish(mut self) -> Result<ArrayRef> {
749 Ok(Arc::new(self.builder.finish()))
750 }
751}
752
753pub(crate) struct VariantToUuidArrowRowBuilder<'a> {
755 builder: FixedSizeBinaryBuilder,
756 cast_options: &'a CastOptions<'a>,
757}
758
759impl<'a> VariantToUuidArrowRowBuilder<'a> {
760 fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
761 Self {
762 builder: FixedSizeBinaryBuilder::with_capacity(capacity, 16),
763 cast_options,
764 }
765 }
766
767 fn append_null(&mut self) -> Result<()> {
768 self.builder.append_null();
769 Ok(())
770 }
771
772 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
773 match value.as_uuid() {
774 Some(uuid) => {
775 self.builder
776 .append_value(uuid.as_bytes())
777 .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
778
779 Ok(true)
780 }
781 None if self.cast_options.safe => {
782 self.builder.append_null();
783 Ok(false)
784 }
785 None => Err(ArrowError::CastError(format!(
786 "Failed to extract UUID from variant {value:?}",
787 ))),
788 }
789 }
790
791 fn finish(mut self) -> Result<ArrayRef> {
792 Ok(Arc::new(self.builder.finish()))
793 }
794}
795
796pub(crate) struct VariantToListArrowRowBuilder<'a, O, const IS_VIEW: bool>
797where
798 O: OffsetSizeTrait + ArrowNativeTypeOp,
799{
800 field: FieldRef,
801 offsets: Vec<O>,
802 element_builder: Box<VariantToShreddedVariantRowBuilder<'a>>,
803 nulls: NullBufferBuilder,
804 current_offset: O,
805 cast_options: &'a CastOptions<'a>,
806}
807
808impl<'a, O, const IS_VIEW: bool> VariantToListArrowRowBuilder<'a, O, IS_VIEW>
809where
810 O: OffsetSizeTrait + ArrowNativeTypeOp,
811{
812 fn try_new(
813 field: FieldRef,
814 element_data_type: &'a DataType,
815 cast_options: &'a CastOptions,
816 capacity: usize,
817 ) -> Result<Self> {
818 if capacity >= isize::MAX as usize {
819 return Err(ArrowError::ComputeError(
820 "Capacity exceeds isize::MAX when reserving list offsets".to_string(),
821 ));
822 }
823 let mut offsets = Vec::with_capacity(capacity + 1);
824 offsets.push(O::ZERO);
825 let element_builder = make_variant_to_shredded_variant_arrow_row_builder(
826 element_data_type,
827 cast_options,
828 capacity,
829 false,
830 )?;
831 Ok(Self {
832 field,
833 offsets,
834 element_builder: Box::new(element_builder),
835 nulls: NullBufferBuilder::new(capacity),
836 current_offset: O::ZERO,
837 cast_options,
838 })
839 }
840
841 fn append_null(&mut self) -> Result<()> {
842 self.offsets.push(self.current_offset);
843 self.nulls.append_null();
844 Ok(())
845 }
846
847 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
848 match value {
849 Variant::List(list) => {
850 for element in list.iter() {
851 self.element_builder.append_value(element)?;
852 self.current_offset = self.current_offset.add_checked(O::ONE)?;
853 }
854 self.offsets.push(self.current_offset);
855 self.nulls.append_non_null();
856 Ok(true)
857 }
858 _ if self.cast_options.safe => {
859 self.append_null()?;
860 Ok(false)
861 }
862 _ => Err(ArrowError::CastError(format!(
863 "Failed to extract list from variant {:?}",
864 value
865 ))),
866 }
867 }
868
869 fn finish(mut self) -> Result<ArrayRef> {
870 let (value, typed_value, nulls) = self.element_builder.finish()?;
871 let element_array =
872 ShreddedVariantFieldArray::from_parts(Some(value), Some(typed_value), nulls);
873 let field = Arc::new(
874 self.field
875 .as_ref()
876 .clone()
877 .with_data_type(element_array.data_type().clone()),
878 );
879
880 if IS_VIEW {
881 let mut sizes = Vec::with_capacity(self.offsets.len() - 1);
883 for i in 1..self.offsets.len() {
884 sizes.push(self.offsets[i] - self.offsets[i - 1]);
885 }
886 self.offsets.pop();
887 let list_view_array = GenericListViewArray::<O>::new(
888 field,
889 ScalarBuffer::from(self.offsets),
890 ScalarBuffer::from(sizes),
891 ArrayRef::from(element_array),
892 self.nulls.finish(),
893 );
894 Ok(Arc::new(list_view_array))
895 } else {
896 let list_array = GenericListArray::<O>::new(
897 field,
898 OffsetBuffer::<O>::new(ScalarBuffer::from(self.offsets)),
899 ArrayRef::from(element_array),
900 self.nulls.finish(),
901 );
902 Ok(Arc::new(list_array))
903 }
904 }
905}
906
907pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
909 metadata: BinaryViewArray,
910 builder: VariantValueArrayBuilder,
911 nulls: NullBufferBuilder,
912}
913
914impl VariantToBinaryVariantArrowRowBuilder {
915 fn new(metadata: BinaryViewArray, capacity: usize) -> Self {
916 Self {
917 metadata,
918 builder: VariantValueArrayBuilder::new(capacity),
919 nulls: NullBufferBuilder::new(capacity),
920 }
921 }
922}
923
924impl VariantToBinaryVariantArrowRowBuilder {
925 fn append_null(&mut self) -> Result<()> {
926 self.builder.append_null();
927 self.nulls.append_null();
928 Ok(())
929 }
930
931 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
932 self.builder.append_value(value);
933 self.nulls.append_non_null();
934 Ok(true)
935 }
936
937 fn finish(mut self) -> Result<ArrayRef> {
938 let variant_array = VariantArray::from_parts(
939 self.metadata,
940 Some(self.builder.build()?),
941 None, self.nulls.finish(),
943 );
944
945 Ok(ArrayRef::from(variant_array))
946 }
947}
948
949#[derive(Default)]
950struct FakeNullBuilder {
951 item_count: usize,
952}
953
954impl FakeNullBuilder {
955 fn append_value(&mut self, _: ()) {
956 self.item_count += 1;
957 }
958
959 fn append_null(&mut self) {
960 self.item_count += 1;
961 }
962
963 fn finish(self) -> NullArray {
964 NullArray::new(self.item_count)
965 }
966}
967
968define_variant_to_primitive_builder!(
969 struct VariantToNullArrowRowBuilder<'a>
970 |_capacity| -> FakeNullBuilder { FakeNullBuilder::default() },
971 |value| value.as_null(),
972 type_name: "Null"
973);
974
975#[cfg(test)]
976mod tests {
977 use super::make_primitive_variant_to_arrow_row_builder;
978 use arrow::compute::CastOptions;
979 use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode};
980 use arrow::error::ArrowError;
981 use std::sync::Arc;
982
983 #[test]
984 fn make_primitive_builder_rejects_non_primitive_types() {
985 let cast_options = CastOptions::default();
986 let item_field = Arc::new(Field::new("item", DataType::Int32, true));
987 let struct_fields = Fields::from(vec![Field::new("child", DataType::Int32, true)]);
988 let map_entries_field = Arc::new(Field::new(
989 "entries",
990 DataType::Struct(Fields::from(vec![
991 Field::new("key", DataType::Utf8, false),
992 Field::new("value", DataType::Float64, true),
993 ])),
994 true,
995 ));
996 let union_fields =
997 UnionFields::try_new(vec![1], vec![Field::new("child", DataType::Int32, true)])
998 .unwrap();
999 let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false));
1000 let ree_values_field = Arc::new(Field::new("values", DataType::Utf8, true));
1001
1002 let non_primitive_types = vec![
1003 DataType::List(item_field.clone()),
1004 DataType::LargeList(item_field.clone()),
1005 DataType::ListView(item_field.clone()),
1006 DataType::LargeListView(item_field.clone()),
1007 DataType::FixedSizeList(item_field.clone(), 2),
1008 DataType::Struct(struct_fields.clone()),
1009 DataType::Map(map_entries_field.clone(), false),
1010 DataType::Union(union_fields.clone(), UnionMode::Dense),
1011 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1012 DataType::RunEndEncoded(run_ends_field.clone(), ree_values_field.clone()),
1013 ];
1014
1015 for data_type in non_primitive_types {
1016 let err =
1017 match make_primitive_variant_to_arrow_row_builder(&data_type, &cast_options, 1) {
1018 Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"),
1019 Err(err) => err,
1020 };
1021
1022 match err {
1023 ArrowError::InvalidArgumentError(msg) => {
1024 assert!(msg.contains(&format!("{data_type:?}")));
1025 }
1026 other => panic!("expected InvalidArgumentError, got {other:?}"),
1027 }
1028 }
1029 }
1030}