1use crate::shred_variant::{
19 NullValue, VariantToShreddedVariantRowBuilder,
20 make_variant_to_shredded_variant_arrow_row_builder,
21};
22use crate::type_conversion::{
23 PrimitiveFromVariant, TimestampFromVariant, variant_cast_with_options,
24 variant_to_unscaled_decimal,
25};
26use crate::variant_array::ShreddedVariantFieldArray;
27use crate::{VariantArray, VariantValueArrayBuilder};
28use arrow::array::{
29 ArrayRef, ArrowNativeTypeOp, BinaryBuilder, BinaryLikeArrayBuilder, BinaryViewArray,
30 BinaryViewBuilder, BooleanBuilder, FixedSizeBinaryBuilder, GenericListArray,
31 GenericListViewArray, LargeBinaryBuilder, LargeStringBuilder, NullArray, NullBufferBuilder,
32 OffsetSizeTrait, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder,
33 StructArray,
34};
35use arrow::buffer::{OffsetBuffer, ScalarBuffer};
36use arrow::compute::{CastOptions, DecimalCast};
37use arrow::datatypes::{self, DataType, DecimalType};
38use arrow::error::{ArrowError, Result};
39use arrow_schema::{FieldRef, Fields, TimeUnit};
40use parquet_variant::{Variant, VariantPath};
41use std::sync::Arc;
42
43pub(crate) enum VariantToArrowRowBuilder<'a> {
48 Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
49 Array(ArrayVariantToArrowRowBuilder<'a>),
50 Struct(StructVariantToArrowRowBuilder<'a>),
51 BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
52
53 WithPath(VariantPathRowBuilder<'a>),
55}
56
57impl<'a> VariantToArrowRowBuilder<'a> {
58 pub fn append_null(&mut self) -> Result<()> {
59 use VariantToArrowRowBuilder::*;
60 match self {
61 Primitive(b) => b.append_null(),
62 Array(b) => b.append_null(),
63 Struct(b) => b.append_null(),
64 BinaryVariant(b) => b.append_null(),
65 WithPath(path_builder) => path_builder.append_null(),
66 }
67 }
68
69 pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
70 use VariantToArrowRowBuilder::*;
71 match self {
72 Primitive(b) => b.append_value(&value),
73 Array(b) => b.append_value(&value),
74 Struct(b) => b.append_value(&value),
75 BinaryVariant(b) => b.append_value(value),
76 WithPath(path_builder) => path_builder.append_value(value),
77 }
78 }
79
80 pub fn finish(self) -> Result<ArrayRef> {
81 use VariantToArrowRowBuilder::*;
82 match self {
83 Primitive(b) => b.finish(),
84 Array(b) => b.finish(),
85 Struct(b) => b.finish(),
86 BinaryVariant(b) => b.finish(),
87 WithPath(path_builder) => path_builder.finish(),
88 }
89 }
90}
91
92fn make_typed_variant_to_arrow_row_builder<'a>(
93 data_type: &'a DataType,
94 cast_options: &'a CastOptions,
95 capacity: usize,
96) -> Result<VariantToArrowRowBuilder<'a>> {
97 use VariantToArrowRowBuilder::*;
98
99 match data_type {
100 DataType::Struct(fields) => {
101 let builder = StructVariantToArrowRowBuilder::try_new(fields, cast_options, capacity)?;
102 Ok(Struct(builder))
103 }
104 data_type @ (DataType::List(_)
105 | DataType::LargeList(_)
106 | DataType::ListView(_)
107 | DataType::LargeListView(_)
108 | DataType::FixedSizeList(..)) => {
109 let builder =
110 ArrayVariantToArrowRowBuilder::try_new(data_type, cast_options, capacity, false)?;
111 Ok(Array(builder))
112 }
113 data_type => {
114 let builder =
115 make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
116 Ok(Primitive(builder))
117 }
118 }
119}
120
121pub(crate) fn make_variant_to_arrow_row_builder<'a>(
122 metadata: &BinaryViewArray,
123 path: VariantPath<'a>,
124 data_type: Option<&'a DataType>,
125 cast_options: &'a CastOptions,
126 capacity: usize,
127) -> Result<VariantToArrowRowBuilder<'a>> {
128 use VariantToArrowRowBuilder::*;
129
130 let mut builder = match data_type {
131 None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
133 metadata.clone(),
134 capacity,
135 )),
136 Some(data_type) => {
137 make_typed_variant_to_arrow_row_builder(data_type, cast_options, capacity)?
138 }
139 };
140
141 if !path.is_empty() {
143 builder = WithPath(VariantPathRowBuilder {
144 builder: Box::new(builder),
145 path,
146 })
147 };
148
149 Ok(builder)
150}
151
152pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
156 Null(VariantToNullArrowRowBuilder<'a>),
157 Boolean(VariantToBooleanArrowRowBuilder<'a>),
158 Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
159 Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
160 Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
161 Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>),
162 UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
163 UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
164 UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
165 UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
166 Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
167 Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
168 Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
169 Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>),
170 Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>),
171 Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>),
172 Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>),
173 TimestampSecond(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampSecondType>),
174 TimestampSecondNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampSecondType>),
175 TimestampMilli(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMillisecondType>),
176 TimestampMilliNtz(
177 VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMillisecondType>,
178 ),
179 TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>),
180 TimestampMicroNtz(
181 VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>,
182 ),
183 TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
184 TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
185 Time32Second(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32SecondType>),
186 Time32Milli(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32MillisecondType>),
187 Time64Micro(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>),
188 Time64Nano(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64NanosecondType>),
189 Date32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
190 Date64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date64Type>),
191 Uuid(VariantToUuidArrowRowBuilder<'a>),
192 String(VariantToStringArrowBuilder<'a, StringBuilder>),
193 LargeString(VariantToStringArrowBuilder<'a, LargeStringBuilder>),
194 StringView(VariantToStringArrowBuilder<'a, StringViewBuilder>),
195 Binary(VariantToBinaryArrowRowBuilder<'a, BinaryBuilder>),
196 LargeBinary(VariantToBinaryArrowRowBuilder<'a, LargeBinaryBuilder>),
197 BinaryView(VariantToBinaryArrowRowBuilder<'a, BinaryViewBuilder>),
198}
199
200impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
201 pub fn append_null(&mut self) -> Result<()> {
202 use PrimitiveVariantToArrowRowBuilder::*;
203 match self {
204 Null(b) => b.append_null(),
205 Boolean(b) => b.append_null(),
206 Int8(b) => b.append_null(),
207 Int16(b) => b.append_null(),
208 Int32(b) => b.append_null(),
209 Int64(b) => b.append_null(),
210 UInt8(b) => b.append_null(),
211 UInt16(b) => b.append_null(),
212 UInt32(b) => b.append_null(),
213 UInt64(b) => b.append_null(),
214 Float16(b) => b.append_null(),
215 Float32(b) => b.append_null(),
216 Float64(b) => b.append_null(),
217 Decimal32(b) => b.append_null(),
218 Decimal64(b) => b.append_null(),
219 Decimal128(b) => b.append_null(),
220 Decimal256(b) => b.append_null(),
221 TimestampSecond(b) => b.append_null(),
222 TimestampSecondNtz(b) => b.append_null(),
223 TimestampMilli(b) => b.append_null(),
224 TimestampMilliNtz(b) => b.append_null(),
225 TimestampMicro(b) => b.append_null(),
226 TimestampMicroNtz(b) => b.append_null(),
227 TimestampNano(b) => b.append_null(),
228 TimestampNanoNtz(b) => b.append_null(),
229 Time32Second(b) => b.append_null(),
230 Time32Milli(b) => b.append_null(),
231 Time64Micro(b) => b.append_null(),
232 Time64Nano(b) => b.append_null(),
233 Date32(b) => b.append_null(),
234 Date64(b) => b.append_null(),
235 Uuid(b) => b.append_null(),
236 String(b) => b.append_null(),
237 LargeString(b) => b.append_null(),
238 StringView(b) => b.append_null(),
239 Binary(b) => b.append_null(),
240 LargeBinary(b) => b.append_null(),
241 BinaryView(b) => b.append_null(),
242 }
243 }
244
245 pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
246 use PrimitiveVariantToArrowRowBuilder::*;
247 match self {
248 Null(b) => b.append_value(value),
249 Boolean(b) => b.append_value(value),
250 Int8(b) => b.append_value(value),
251 Int16(b) => b.append_value(value),
252 Int32(b) => b.append_value(value),
253 Int64(b) => b.append_value(value),
254 UInt8(b) => b.append_value(value),
255 UInt16(b) => b.append_value(value),
256 UInt32(b) => b.append_value(value),
257 UInt64(b) => b.append_value(value),
258 Float16(b) => b.append_value(value),
259 Float32(b) => b.append_value(value),
260 Float64(b) => b.append_value(value),
261 Decimal32(b) => b.append_value(value),
262 Decimal64(b) => b.append_value(value),
263 Decimal128(b) => b.append_value(value),
264 Decimal256(b) => b.append_value(value),
265 TimestampSecond(b) => b.append_value(value),
266 TimestampSecondNtz(b) => b.append_value(value),
267 TimestampMilli(b) => b.append_value(value),
268 TimestampMilliNtz(b) => b.append_value(value),
269 TimestampMicro(b) => b.append_value(value),
270 TimestampMicroNtz(b) => b.append_value(value),
271 TimestampNano(b) => b.append_value(value),
272 TimestampNanoNtz(b) => b.append_value(value),
273 Time32Second(b) => b.append_value(value),
274 Time32Milli(b) => b.append_value(value),
275 Time64Micro(b) => b.append_value(value),
276 Time64Nano(b) => b.append_value(value),
277 Date32(b) => b.append_value(value),
278 Date64(b) => b.append_value(value),
279 Uuid(b) => b.append_value(value),
280 String(b) => b.append_value(value),
281 LargeString(b) => b.append_value(value),
282 StringView(b) => b.append_value(value),
283 Binary(b) => b.append_value(value),
284 LargeBinary(b) => b.append_value(value),
285 BinaryView(b) => b.append_value(value),
286 }
287 }
288
289 pub fn finish(self) -> Result<ArrayRef> {
290 use PrimitiveVariantToArrowRowBuilder::*;
291 match self {
292 Null(b) => b.finish(),
293 Boolean(b) => b.finish(),
294 Int8(b) => b.finish(),
295 Int16(b) => b.finish(),
296 Int32(b) => b.finish(),
297 Int64(b) => b.finish(),
298 UInt8(b) => b.finish(),
299 UInt16(b) => b.finish(),
300 UInt32(b) => b.finish(),
301 UInt64(b) => b.finish(),
302 Float16(b) => b.finish(),
303 Float32(b) => b.finish(),
304 Float64(b) => b.finish(),
305 Decimal32(b) => b.finish(),
306 Decimal64(b) => b.finish(),
307 Decimal128(b) => b.finish(),
308 Decimal256(b) => b.finish(),
309 TimestampSecond(b) => b.finish(),
310 TimestampSecondNtz(b) => b.finish(),
311 TimestampMilli(b) => b.finish(),
312 TimestampMilliNtz(b) => b.finish(),
313 TimestampMicro(b) => b.finish(),
314 TimestampMicroNtz(b) => b.finish(),
315 TimestampNano(b) => b.finish(),
316 TimestampNanoNtz(b) => b.finish(),
317 Time32Second(b) => b.finish(),
318 Time32Milli(b) => b.finish(),
319 Time64Micro(b) => b.finish(),
320 Time64Nano(b) => b.finish(),
321 Date32(b) => b.finish(),
322 Date64(b) => b.finish(),
323 Uuid(b) => b.finish(),
324 String(b) => b.finish(),
325 LargeString(b) => b.finish(),
326 StringView(b) => b.finish(),
327 Binary(b) => b.finish(),
328 LargeBinary(b) => b.finish(),
329 BinaryView(b) => b.finish(),
330 }
331 }
332}
333
334pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
336 data_type: &'a DataType,
337 cast_options: &'a CastOptions,
338 capacity: usize,
339) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
340 use PrimitiveVariantToArrowRowBuilder::*;
341
342 let builder =
343 match data_type {
344 DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
345 DataType::Boolean => {
346 Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity))
347 }
348 DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
349 cast_options,
350 capacity,
351 )),
352 DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
353 cast_options,
354 capacity,
355 )),
356 DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
357 cast_options,
358 capacity,
359 )),
360 DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
361 cast_options,
362 capacity,
363 )),
364 DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
365 cast_options,
366 capacity,
367 )),
368 DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
369 cast_options,
370 capacity,
371 )),
372 DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
373 cast_options,
374 capacity,
375 )),
376 DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
377 cast_options,
378 capacity,
379 )),
380 DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
381 cast_options,
382 capacity,
383 )),
384 DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
385 cast_options,
386 capacity,
387 )),
388 DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
389 cast_options,
390 capacity,
391 )),
392 DataType::Decimal32(precision, scale) => Decimal32(
393 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
394 ),
395 DataType::Decimal64(precision, scale) => Decimal64(
396 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
397 ),
398 DataType::Decimal128(precision, scale) => Decimal128(
399 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
400 ),
401 DataType::Decimal256(precision, scale) => Decimal256(
402 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
403 ),
404 DataType::Date32 => Date32(VariantToPrimitiveArrowRowBuilder::new(
405 cast_options,
406 capacity,
407 )),
408 DataType::Date64 => Date64(VariantToPrimitiveArrowRowBuilder::new(
409 cast_options,
410 capacity,
411 )),
412 DataType::Time32(TimeUnit::Second) => Time32Second(
413 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
414 ),
415 DataType::Time32(TimeUnit::Millisecond) => Time32Milli(
416 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
417 ),
418 DataType::Time32(t) => {
419 return Err(ArrowError::InvalidArgumentError(format!(
420 "The unit for Time32 must be second/millisecond, received {t:?}"
421 )));
422 }
423 DataType::Time64(TimeUnit::Microsecond) => Time64Micro(
424 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
425 ),
426 DataType::Time64(TimeUnit::Nanosecond) => Time64Nano(
427 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
428 ),
429 DataType::Time64(t) => {
430 return Err(ArrowError::InvalidArgumentError(format!(
431 "The unit for Time64 must be micro/nano seconds, received {t:?}"
432 )));
433 }
434 DataType::Timestamp(TimeUnit::Second, None) => TimestampSecondNtz(
435 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
436 ),
437 DataType::Timestamp(TimeUnit::Second, tz) => TimestampSecond(
438 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
439 ),
440 DataType::Timestamp(TimeUnit::Millisecond, None) => TimestampMilliNtz(
441 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
442 ),
443 DataType::Timestamp(TimeUnit::Millisecond, tz) => TimestampMilli(
444 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
445 ),
446 DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
447 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
448 ),
449 DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
450 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
451 ),
452 DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
453 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
454 ),
455 DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
456 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
457 ),
458 DataType::Duration(_) | DataType::Interval(_) => {
459 return Err(ArrowError::InvalidArgumentError(
460 "Casting Variant to duration/interval types is not supported. \
461 The Variant format does not define duration/interval types."
462 .to_string(),
463 ));
464 }
465 DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)),
466 DataType::LargeBinary => {
467 LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
468 }
469 DataType::BinaryView => {
470 BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
471 }
472 DataType::FixedSizeBinary(16) => {
473 Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
474 }
475 DataType::FixedSizeBinary(_) => {
476 return Err(ArrowError::NotYetImplemented(format!(
477 "DataType {data_type:?} not yet implemented"
478 )));
479 }
480 DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)),
481 DataType::LargeUtf8 => {
482 LargeString(VariantToStringArrowBuilder::new(cast_options, capacity))
483 }
484 DataType::Utf8View => {
485 StringView(VariantToStringArrowBuilder::new(cast_options, capacity))
486 }
487 DataType::List(_)
488 | DataType::LargeList(_)
489 | DataType::ListView(_)
490 | DataType::LargeListView(_)
491 | DataType::FixedSizeList(..)
492 | DataType::Struct(_)
493 | DataType::Map(..)
494 | DataType::Union(..)
495 | DataType::Dictionary(..)
496 | DataType::RunEndEncoded(..) => {
497 return Err(ArrowError::InvalidArgumentError(format!(
498 "Casting to {data_type:?} is not applicable for primitive Variant types"
499 )));
500 }
501 };
502 Ok(builder)
503}
504
505pub(crate) enum ArrayVariantToArrowRowBuilder<'a> {
506 List(VariantToListArrowRowBuilder<'a, i32, false>),
507 LargeList(VariantToListArrowRowBuilder<'a, i64, false>),
508 ListView(VariantToListArrowRowBuilder<'a, i32, true>),
509 LargeListView(VariantToListArrowRowBuilder<'a, i64, true>),
510}
511
512pub(crate) struct StructVariantToArrowRowBuilder<'a> {
513 fields: &'a Fields,
514 field_builders: Vec<VariantToArrowRowBuilder<'a>>,
515 nulls: NullBufferBuilder,
516 cast_options: &'a CastOptions<'a>,
517}
518
519impl<'a> StructVariantToArrowRowBuilder<'a> {
520 fn try_new(
521 fields: &'a Fields,
522 cast_options: &'a CastOptions<'a>,
523 capacity: usize,
524 ) -> Result<Self> {
525 let mut field_builders = Vec::with_capacity(fields.len());
526 for field in fields.iter() {
527 field_builders.push(make_typed_variant_to_arrow_row_builder(
528 field.data_type(),
529 cast_options,
530 capacity,
531 )?);
532 }
533 Ok(Self {
534 fields,
535 field_builders,
536 nulls: NullBufferBuilder::new(capacity),
537 cast_options,
538 })
539 }
540
541 fn append_null(&mut self) -> Result<()> {
542 for builder in &mut self.field_builders {
543 builder.append_null()?;
544 }
545 self.nulls.append_null();
546 Ok(())
547 }
548
549 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
550 match variant_cast_with_options(value, self.cast_options, Variant::as_object) {
551 Ok(Some(obj)) => {
552 for (index, field) in self.fields.iter().enumerate() {
553 match obj.get(field.name()) {
554 Some(field_value) => {
555 self.field_builders[index].append_value(field_value)?;
556 }
557 None => {
558 self.field_builders[index].append_null()?;
559 }
560 }
561 }
562
563 self.nulls.append_non_null();
564 Ok(true)
565 }
566 Ok(None) => {
567 self.append_null()?;
568 Ok(false)
569 }
570 Err(_) => Err(ArrowError::CastError(format!(
571 "Failed to extract struct from variant {value:?}"
572 ))),
573 }
574 }
575
576 fn finish(mut self) -> Result<ArrayRef> {
577 let mut children = Vec::with_capacity(self.field_builders.len());
578 for builder in self.field_builders {
579 children.push(builder.finish()?);
580 }
581 Ok(Arc::new(StructArray::try_new(
582 self.fields.clone(),
583 children,
584 self.nulls.finish(),
585 )?))
586 }
587}
588
589impl<'a> ArrayVariantToArrowRowBuilder<'a> {
590 pub(crate) fn try_new(
597 data_type: &'a DataType,
598 cast_options: &'a CastOptions,
599 capacity: usize,
600 shredded: bool,
601 ) -> Result<Self> {
602 use ArrayVariantToArrowRowBuilder::*;
603
604 macro_rules! make_list_builder {
606 ($variant:ident, $offset:ty, $is_view:expr, $field:ident) => {
607 $variant(VariantToListArrowRowBuilder::<$offset, $is_view>::try_new(
608 $field.clone(),
609 $field.data_type(),
610 cast_options,
611 capacity,
612 shredded,
613 )?)
614 };
615 }
616
617 let builder = match data_type {
618 DataType::List(field) => make_list_builder!(List, i32, false, field),
619 DataType::LargeList(field) => make_list_builder!(LargeList, i64, false, field),
620 DataType::ListView(field) => make_list_builder!(ListView, i32, true, field),
621 DataType::LargeListView(field) => make_list_builder!(LargeListView, i64, true, field),
622 DataType::FixedSizeList(..) => {
623 return Err(ArrowError::NotYetImplemented(
624 "Converting unshredded variant arrays to arrow fixed-size lists".to_string(),
625 ));
626 }
627 other => {
628 return Err(ArrowError::InvalidArgumentError(format!(
629 "Casting to {other:?} is not applicable for array Variant types"
630 )));
631 }
632 };
633 Ok(builder)
634 }
635
636 pub(crate) fn append_null(&mut self) -> Result<()> {
637 match self {
638 Self::List(builder) => builder.append_null(),
639 Self::LargeList(builder) => builder.append_null(),
640 Self::ListView(builder) => builder.append_null(),
641 Self::LargeListView(builder) => builder.append_null(),
642 }
643 }
644
645 pub(crate) fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
646 match self {
647 Self::List(builder) => builder.append_value(value),
648 Self::LargeList(builder) => builder.append_value(value),
649 Self::ListView(builder) => builder.append_value(value),
650 Self::LargeListView(builder) => builder.append_value(value),
651 }
652 }
653
654 pub(crate) fn finish(self) -> Result<ArrayRef> {
655 match self {
656 Self::List(builder) => builder.finish(),
657 Self::LargeList(builder) => builder.finish(),
658 Self::ListView(builder) => builder.finish(),
659 Self::LargeListView(builder) => builder.finish(),
660 }
661 }
662}
663
664pub(crate) struct VariantPathRowBuilder<'a> {
667 builder: Box<VariantToArrowRowBuilder<'a>>,
668 path: VariantPath<'a>,
669}
670
671impl<'a> VariantPathRowBuilder<'a> {
672 fn append_null(&mut self) -> Result<()> {
673 self.builder.append_null()
674 }
675
676 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
677 if let Some(v) = value.get_path(&self.path) {
678 self.builder.append_value(v)
679 } else {
680 self.builder.append_null()?;
681 Ok(false)
682 }
683 }
684
685 fn finish(self) -> Result<ArrayRef> {
686 self.builder.finish()
687 }
688}
689
690macro_rules! define_variant_to_primitive_builder {
691 (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
692 |$array_param:ident $(, $field:ident: $field_type:ty)?| -> $builder_name:ident $(< $array_type:ty >)? { $init_expr: expr },
693 |$value: ident| $value_transform:expr,
694 type_name: $type_name:expr) => {
695 pub(crate) struct $name<$lifetime $(, $generic : $bound )?>
696 {
697 builder: $builder_name $(<$array_type>)?,
698 cast_options: &$lifetime CastOptions<$lifetime>,
699 }
700
701 impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic )?> {
702 fn new(
703 cast_options: &$lifetime CastOptions<$lifetime>,
704 $array_param: usize,
705 $( $field: $field_type, )?
707 ) -> Self {
708 Self {
709 builder: $init_expr,
710 cast_options,
711 }
712 }
713
714 fn append_null(&mut self) -> Result<()> {
715 self.builder.append_null();
716 Ok(())
717 }
718
719 fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result<bool> {
720 match variant_cast_with_options(
721 $value,
722 self.cast_options,
723 |$value| $value_transform,
724 ) {
725 Ok(Some(v)) => {
726 self.builder.append_value(v);
727 Ok(true)
728 }
729 Ok(None) => {
730 self.builder.append_null();
731 Ok(false)
732 }
733 Err(_) => Err(ArrowError::CastError(format!(
734 "Failed to extract primitive of type {type_name} from variant {value:?} at path VariantPath([])",
735 type_name = $type_name,
736 value = $value
737 ))),
738 }
739 }
740
741 #[allow(unused_mut)]
744 fn finish(mut self) -> Result<ArrayRef> {
745 Ok(Arc::from(self.builder.finish()))
749 }
750 }
751 }
752}
753
754define_variant_to_primitive_builder!(
755 struct VariantToStringArrowBuilder<'a, B: StringLikeArrayBuilder>
756 |capacity| -> B { B::with_capacity(capacity) },
757 |value| value.as_string(),
758 type_name: B::type_name()
759);
760
761define_variant_to_primitive_builder!(
762 struct VariantToBooleanArrowRowBuilder<'a>
763 |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
764 |value| value.as_boolean(),
765 type_name: datatypes::BooleanType::DATA_TYPE
766);
767
768define_variant_to_primitive_builder!(
769 struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant>
770 |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
771 |value| T::from_variant(value),
772 type_name: T::DATA_TYPE
773);
774
775define_variant_to_primitive_builder!(
776 struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant<true>>
777 |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
778 |value| T::from_variant(value),
779 type_name: T::DATA_TYPE
780);
781
782define_variant_to_primitive_builder!(
783 struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
784 |capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
785 PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
786 },
787 |value| T::from_variant(value),
788 type_name: T::DATA_TYPE
789);
790
791define_variant_to_primitive_builder!(
792 struct VariantToBinaryArrowRowBuilder<'a, B: BinaryLikeArrayBuilder>
793 |capacity| -> B { B::with_capacity(capacity) },
794 |value| value.as_u8_slice(),
795 type_name: B::type_name()
796);
797
798pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T>
800where
801 T: DecimalType,
802 T::Native: DecimalCast,
803{
804 builder: PrimitiveBuilder<T>,
805 cast_options: &'a CastOptions<'a>,
806 precision: u8,
807 scale: i8,
808}
809
810impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T>
811where
812 T: DecimalType,
813 T::Native: DecimalCast,
814{
815 fn new(
816 cast_options: &'a CastOptions<'a>,
817 capacity: usize,
818 precision: u8,
819 scale: i8,
820 ) -> Result<Self> {
821 let builder = PrimitiveBuilder::<T>::with_capacity(capacity)
822 .with_precision_and_scale(precision, scale)?;
823 Ok(Self {
824 builder,
825 cast_options,
826 precision,
827 scale,
828 })
829 }
830
831 fn append_null(&mut self) -> Result<()> {
832 self.builder.append_null();
833 Ok(())
834 }
835
836 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
837 match variant_cast_with_options(value, self.cast_options, |value| {
838 variant_to_unscaled_decimal::<T>(value, self.precision, self.scale)
839 }) {
840 Ok(Some(scaled)) => {
841 self.builder.append_value(scaled);
842 Ok(true)
843 }
844 Ok(None) => {
845 self.builder.append_null();
846 Ok(false)
847 }
848 Err(_) => Err(ArrowError::CastError(format!(
849 "Failed to cast to {prefix}(precision={precision}, scale={scale}) from variant {value:?}",
850 prefix = T::PREFIX,
851 precision = self.precision,
852 scale = self.scale
853 ))),
854 }
855 }
856
857 fn finish(mut self) -> Result<ArrayRef> {
858 Ok(Arc::new(self.builder.finish()))
859 }
860}
861
862pub(crate) struct VariantToUuidArrowRowBuilder<'a> {
864 builder: FixedSizeBinaryBuilder,
865 cast_options: &'a CastOptions<'a>,
866}
867
868impl<'a> VariantToUuidArrowRowBuilder<'a> {
869 fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
870 Self {
871 builder: FixedSizeBinaryBuilder::with_capacity(capacity, 16),
872 cast_options,
873 }
874 }
875
876 fn append_null(&mut self) -> Result<()> {
877 self.builder.append_null();
878 Ok(())
879 }
880
881 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
882 match variant_cast_with_options(value, self.cast_options, Variant::as_uuid) {
883 Ok(Some(uuid)) => {
884 self.builder
885 .append_value(uuid.as_bytes())
886 .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
887 Ok(true)
888 }
889 Ok(None) => {
890 self.builder.append_null();
891 Ok(false)
892 }
893 Err(_) => Err(ArrowError::CastError(format!(
894 "Failed to extract UUID from variant {value:?}"
895 ))),
896 }
897 }
898
899 fn finish(mut self) -> Result<ArrayRef> {
900 Ok(Arc::new(self.builder.finish()))
901 }
902}
903
904enum ListElementBuilder<'a> {
907 Typed(Box<VariantToArrowRowBuilder<'a>>),
909 Shredded(Box<VariantToShreddedVariantRowBuilder<'a>>),
911}
912
913impl<'a> ListElementBuilder<'a> {
914 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
915 match self {
916 Self::Typed(b) => b.append_value(value),
917 Self::Shredded(b) => b.append_value(value),
918 }
919 }
920
921 fn finish(self) -> Result<ArrayRef> {
922 match self {
923 Self::Typed(b) => b.finish(),
924 Self::Shredded(b) => {
925 let (value, typed_value, nulls) = b.finish()?;
926 Ok(ArrayRef::from(ShreddedVariantFieldArray::from_parts(
927 Some(value),
928 Some(typed_value),
929 nulls,
930 )))
931 }
932 }
933 }
934}
935
936pub(crate) struct VariantToListArrowRowBuilder<'a, O, const IS_VIEW: bool>
937where
938 O: OffsetSizeTrait + ArrowNativeTypeOp,
939{
940 field: FieldRef,
941 offsets: Vec<O>,
942 element_builder: ListElementBuilder<'a>,
943 nulls: NullBufferBuilder,
944 current_offset: O,
945 cast_options: &'a CastOptions<'a>,
946}
947
948impl<'a, O, const IS_VIEW: bool> VariantToListArrowRowBuilder<'a, O, IS_VIEW>
949where
950 O: OffsetSizeTrait + ArrowNativeTypeOp,
951{
952 fn try_new(
953 field: FieldRef,
954 element_data_type: &'a DataType,
955 cast_options: &'a CastOptions,
956 capacity: usize,
957 shredded: bool,
958 ) -> Result<Self> {
959 if capacity >= isize::MAX as usize {
960 return Err(ArrowError::ComputeError(
961 "Capacity exceeds isize::MAX when reserving list offsets".to_string(),
962 ));
963 }
964 let mut offsets = Vec::with_capacity(capacity + 1);
965 offsets.push(O::ZERO);
966 let element_builder = if shredded {
967 let builder = make_variant_to_shredded_variant_arrow_row_builder(
968 element_data_type,
969 cast_options,
970 capacity,
971 NullValue::ArrayElement,
972 )?;
973 ListElementBuilder::Shredded(Box::new(builder))
974 } else {
975 let builder =
976 make_typed_variant_to_arrow_row_builder(element_data_type, cast_options, capacity)?;
977 ListElementBuilder::Typed(Box::new(builder))
978 };
979
980 Ok(Self {
981 field,
982 offsets,
983 element_builder,
984 nulls: NullBufferBuilder::new(capacity),
985 current_offset: O::ZERO,
986 cast_options,
987 })
988 }
989
990 fn append_null(&mut self) -> Result<()> {
991 self.offsets.push(self.current_offset);
992 self.nulls.append_null();
993 Ok(())
994 }
995
996 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
997 match variant_cast_with_options(value, self.cast_options, Variant::as_list) {
998 Ok(Some(list)) => {
999 for element in list.iter() {
1000 self.element_builder.append_value(element)?;
1001 self.current_offset = self.current_offset.add_checked(O::ONE)?;
1002 }
1003 self.offsets.push(self.current_offset);
1004 self.nulls.append_non_null();
1005 Ok(true)
1006 }
1007 Ok(None) => {
1008 self.append_null()?;
1009 Ok(false)
1010 }
1011 Err(_) => Err(ArrowError::CastError(format!(
1012 "Failed to extract list from variant {value:?}"
1013 ))),
1014 }
1015 }
1016
1017 fn finish(mut self) -> Result<ArrayRef> {
1018 let element_array: ArrayRef = self.element_builder.finish()?;
1019 let field = Arc::new(
1020 self.field
1021 .as_ref()
1022 .clone()
1023 .with_data_type(element_array.data_type().clone()),
1024 );
1025
1026 if IS_VIEW {
1027 let mut sizes = Vec::with_capacity(self.offsets.len() - 1);
1029 for i in 1..self.offsets.len() {
1030 sizes.push(self.offsets[i] - self.offsets[i - 1]);
1031 }
1032 self.offsets.pop();
1033 let list_view_array = GenericListViewArray::<O>::new(
1034 field,
1035 ScalarBuffer::from(self.offsets),
1036 ScalarBuffer::from(sizes),
1037 element_array,
1038 self.nulls.finish(),
1039 );
1040 Ok(Arc::new(list_view_array))
1041 } else {
1042 let list_array = GenericListArray::<O>::new(
1043 field,
1044 OffsetBuffer::<O>::new(ScalarBuffer::from(self.offsets)),
1045 element_array,
1046 self.nulls.finish(),
1047 );
1048 Ok(Arc::new(list_array))
1049 }
1050 }
1051}
1052
1053pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
1055 metadata: BinaryViewArray,
1056 builder: VariantValueArrayBuilder,
1057 nulls: NullBufferBuilder,
1058}
1059
1060impl VariantToBinaryVariantArrowRowBuilder {
1061 fn new(metadata: BinaryViewArray, capacity: usize) -> Self {
1062 Self {
1063 metadata,
1064 builder: VariantValueArrayBuilder::new(capacity),
1065 nulls: NullBufferBuilder::new(capacity),
1066 }
1067 }
1068}
1069
1070impl VariantToBinaryVariantArrowRowBuilder {
1071 fn append_null(&mut self) -> Result<()> {
1072 self.builder.append_null();
1073 self.nulls.append_null();
1074 Ok(())
1075 }
1076
1077 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
1078 self.builder.append_value(value);
1079 self.nulls.append_non_null();
1080 Ok(true)
1081 }
1082
1083 fn finish(mut self) -> Result<ArrayRef> {
1084 let variant_array = VariantArray::from_parts(
1085 self.metadata,
1086 Some(self.builder.build()?),
1087 None, self.nulls.finish(),
1089 );
1090
1091 Ok(ArrayRef::from(variant_array))
1092 }
1093}
1094
1095#[derive(Default)]
1096struct FakeNullBuilder {
1097 item_count: usize,
1098}
1099
1100impl FakeNullBuilder {
1101 fn append_value(&mut self, _: ()) {
1102 self.item_count += 1;
1103 }
1104
1105 fn append_null(&mut self) {
1106 self.item_count += 1;
1107 }
1108
1109 fn finish(self) -> NullArray {
1110 NullArray::new(self.item_count)
1111 }
1112}
1113
1114define_variant_to_primitive_builder!(
1115 struct VariantToNullArrowRowBuilder<'a>
1116 |_capacity| -> FakeNullBuilder { FakeNullBuilder::default() },
1117 |value| value.as_null(),
1118 type_name: "Null"
1119);
1120
1121#[cfg(test)]
1122mod tests {
1123 use super::{
1124 make_primitive_variant_to_arrow_row_builder, make_typed_variant_to_arrow_row_builder,
1125 };
1126 use arrow::array::{
1127 Array, Decimal32Array, FixedSizeBinaryArray, Int32Array, ListArray, StructArray,
1128 };
1129 use arrow::compute::CastOptions;
1130 use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode};
1131 use arrow::error::ArrowError;
1132 use parquet_variant::{Variant, VariantDecimal4};
1133 use std::sync::Arc;
1134 use uuid::Uuid;
1135
1136 #[test]
1137 fn make_primitive_builder_rejects_non_primitive_types() {
1138 let cast_options = CastOptions::default();
1139 let item_field = Arc::new(Field::new("item", DataType::Int32, true));
1140 let struct_fields = Fields::from(vec![Field::new("child", DataType::Int32, true)]);
1141 let map_entries_field = Arc::new(Field::new(
1142 "entries",
1143 DataType::Struct(Fields::from(vec![
1144 Field::new("key", DataType::Utf8, false),
1145 Field::new("value", DataType::Float64, true),
1146 ])),
1147 true,
1148 ));
1149 let union_fields =
1150 UnionFields::try_new(vec![1], vec![Field::new("child", DataType::Int32, true)])
1151 .unwrap();
1152 let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false));
1153 let ree_values_field = Arc::new(Field::new("values", DataType::Utf8, true));
1154
1155 let non_primitive_types = vec![
1156 DataType::List(item_field.clone()),
1157 DataType::LargeList(item_field.clone()),
1158 DataType::ListView(item_field.clone()),
1159 DataType::LargeListView(item_field.clone()),
1160 DataType::FixedSizeList(item_field.clone(), 2),
1161 DataType::Struct(struct_fields.clone()),
1162 DataType::Map(map_entries_field.clone(), false),
1163 DataType::Union(union_fields.clone(), UnionMode::Dense),
1164 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1165 DataType::RunEndEncoded(run_ends_field.clone(), ree_values_field.clone()),
1166 ];
1167
1168 for data_type in non_primitive_types {
1169 let err =
1170 match make_primitive_variant_to_arrow_row_builder(&data_type, &cast_options, 1) {
1171 Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"),
1172 Err(err) => err,
1173 };
1174
1175 match err {
1176 ArrowError::InvalidArgumentError(msg) => {
1177 assert!(msg.contains(&format!("{data_type:?}")));
1178 }
1179 other => panic!("expected InvalidArgumentError, got {other:?}"),
1180 }
1181 }
1182 }
1183
1184 #[test]
1185 fn strict_cast_allows_variant_null_for_primitive_builder() {
1186 let cast_options = CastOptions {
1187 safe: false,
1188 ..Default::default()
1189 };
1190 let mut builder =
1191 make_primitive_variant_to_arrow_row_builder(&DataType::Int32, &cast_options, 2)
1192 .unwrap();
1193
1194 assert!(!builder.append_value(&Variant::Null).unwrap());
1195 assert!(builder.append_value(&Variant::Int32(42)).unwrap());
1196
1197 let array = builder.finish().unwrap();
1198 let int_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1199 assert!(int_array.is_null(0));
1200 assert_eq!(int_array.value(1), 42);
1201 }
1202
1203 #[test]
1204 fn strict_cast_allows_variant_null_for_decimal_builder() {
1205 let cast_options = CastOptions {
1206 safe: false,
1207 ..Default::default()
1208 };
1209 let mut builder = make_primitive_variant_to_arrow_row_builder(
1210 &DataType::Decimal32(9, 2),
1211 &cast_options,
1212 2,
1213 )
1214 .unwrap();
1215 let decimal_variant: Variant<'_, '_> = VariantDecimal4::try_new(1234, 2).unwrap().into();
1216
1217 assert!(!builder.append_value(&Variant::Null).unwrap());
1218 assert!(builder.append_value(&decimal_variant).unwrap());
1219
1220 let array = builder.finish().unwrap();
1221 let decimal_array = array.as_any().downcast_ref::<Decimal32Array>().unwrap();
1222 assert!(decimal_array.is_null(0));
1223 assert_eq!(decimal_array.value(1), 1234);
1224 }
1225
1226 #[test]
1227 fn strict_cast_allows_variant_null_for_uuid_builder() {
1228 let cast_options = CastOptions {
1229 safe: false,
1230 ..Default::default()
1231 };
1232 let mut builder = make_primitive_variant_to_arrow_row_builder(
1233 &DataType::FixedSizeBinary(16),
1234 &cast_options,
1235 2,
1236 )
1237 .unwrap();
1238 let uuid = Uuid::nil();
1239
1240 assert!(!builder.append_value(&Variant::Null).unwrap());
1241 assert!(builder.append_value(&Variant::Uuid(uuid)).unwrap());
1242
1243 let array = builder.finish().unwrap();
1244 let uuid_array = array
1245 .as_any()
1246 .downcast_ref::<FixedSizeBinaryArray>()
1247 .unwrap();
1248 assert!(uuid_array.is_null(0));
1249 assert_eq!(uuid_array.value(1), uuid.as_bytes());
1250 }
1251
1252 #[test]
1253 fn strict_cast_allows_variant_null_for_list_and_struct_builders() {
1254 let cast_options = CastOptions {
1255 safe: false,
1256 ..Default::default()
1257 };
1258
1259 let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
1260 let mut list_builder =
1261 make_typed_variant_to_arrow_row_builder(&list_type, &cast_options, 1).unwrap();
1262 assert!(!list_builder.append_value(Variant::Null).unwrap());
1263 let list_array = list_builder.finish().unwrap();
1264 let list_array = list_array.as_any().downcast_ref::<ListArray>().unwrap();
1265 assert!(list_array.is_null(0));
1266
1267 let struct_type =
1268 DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)]));
1269 let mut struct_builder =
1270 make_typed_variant_to_arrow_row_builder(&struct_type, &cast_options, 1).unwrap();
1271 assert!(!struct_builder.append_value(Variant::Null).unwrap());
1272 let struct_array = struct_builder.finish().unwrap();
1273 let struct_array = struct_array.as_any().downcast_ref::<StructArray>().unwrap();
1274 assert!(struct_array.is_null(0));
1275 }
1276}