1use crate::shred_variant::{
19 NullValue, VariantToShreddedVariantRowBuilder,
20 make_variant_to_shredded_variant_arrow_row_builder,
21};
22use crate::type_conversion::{
23 PrimitiveFromVariant, TimestampFromVariant, variant_cast_with_options,
24 variant_to_unscaled_decimal,
25};
26use crate::variant_array::ShreddedVariantFieldArray;
27use crate::{VariantArray, VariantValueArrayBuilder};
28use arrow::array::{
29 ArrayRef, ArrowNativeTypeOp, BinaryBuilder, BinaryLikeArrayBuilder, BinaryViewBuilder,
30 BooleanBuilder, FixedSizeBinaryBuilder, FixedSizeListArray, GenericListArray,
31 GenericListViewArray, LargeBinaryBuilder, LargeStringBuilder, NullArray, NullBufferBuilder,
32 OffsetSizeTrait, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder,
33 StructArray,
34};
35use arrow::buffer::{OffsetBuffer, ScalarBuffer};
36use arrow::compute::{CastOptions, DecimalCast, cast_with_options};
37use arrow::datatypes::{self, DataType, DecimalType};
38use arrow::error::{ArrowError, Result};
39use arrow_schema::{FieldRef, Fields, TimeUnit};
40use parquet_variant::{Variant, VariantPath};
41use std::sync::Arc;
42
43pub(crate) enum VariantToArrowRowBuilder<'a> {
48 Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
49 Array(ArrayVariantToArrowRowBuilder<'a>),
50 Struct(StructVariantToArrowRowBuilder<'a>),
51 Encoded(EncodedVariantToArrowRowBuilder<'a>),
52 BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
53
54 WithPath(VariantPathRowBuilder<'a>),
56}
57
58impl<'a> VariantToArrowRowBuilder<'a> {
59 pub fn append_null(&mut self) -> Result<()> {
60 use VariantToArrowRowBuilder::*;
61 match self {
62 Primitive(b) => b.append_null(),
63 Array(b) => b.append_null(),
64 Struct(b) => b.append_null(),
65 Encoded(b) => b.append_null(),
66 BinaryVariant(b) => b.append_null(),
67 WithPath(path_builder) => path_builder.append_null(),
68 }
69 }
70
71 pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
72 use VariantToArrowRowBuilder::*;
73 match self {
74 Primitive(b) => b.append_value(&value),
75 Array(b) => b.append_value(&value),
76 Struct(b) => b.append_value(&value),
77 Encoded(b) => b.append_value(value),
78 BinaryVariant(b) => b.append_value(value),
79 WithPath(path_builder) => path_builder.append_value(value),
80 }
81 }
82
83 pub fn finish(self) -> Result<ArrayRef> {
84 use VariantToArrowRowBuilder::*;
85 match self {
86 Primitive(b) => b.finish(),
87 Array(b) => b.finish(),
88 Struct(b) => b.finish(),
89 Encoded(b) => b.finish(),
90 BinaryVariant(b) => b.finish(),
91 WithPath(path_builder) => path_builder.finish(),
92 }
93 }
94}
95
96fn make_typed_variant_to_arrow_row_builder<'a>(
97 data_type: &'a DataType,
98 cast_options: &'a CastOptions,
99 capacity: usize,
100) -> Result<VariantToArrowRowBuilder<'a>> {
101 use VariantToArrowRowBuilder::*;
102
103 match data_type {
104 DataType::Struct(fields) => {
105 let builder = StructVariantToArrowRowBuilder::try_new(fields, cast_options, capacity)?;
106 Ok(Struct(builder))
107 }
108 data_type @ (DataType::List(_)
109 | DataType::LargeList(_)
110 | DataType::ListView(_)
111 | DataType::LargeListView(_)
112 | DataType::FixedSizeList(..)) => {
113 let builder =
114 ArrayVariantToArrowRowBuilder::try_new(data_type, cast_options, capacity, false)?;
115 Ok(Array(builder))
116 }
117 DataType::Dictionary(_, value_type) => {
118 let builder = EncodedVariantToArrowRowBuilder::try_new(
119 data_type,
120 value_type.as_ref(),
121 cast_options,
122 capacity,
123 )?;
124 Ok(Encoded(builder))
125 }
126 DataType::RunEndEncoded(_, value_field) => {
127 let builder = EncodedVariantToArrowRowBuilder::try_new(
128 data_type,
129 value_field.data_type(),
130 cast_options,
131 capacity,
132 )?;
133 Ok(Encoded(builder))
134 }
135 data_type => {
136 let builder =
137 make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
138 Ok(Primitive(builder))
139 }
140 }
141}
142
143pub(crate) fn make_variant_to_arrow_row_builder<'a>(
144 metadata: &ArrayRef,
145 path: VariantPath<'a>,
146 data_type: Option<&'a DataType>,
147 cast_options: &'a CastOptions,
148 capacity: usize,
149) -> Result<VariantToArrowRowBuilder<'a>> {
150 use VariantToArrowRowBuilder::*;
151
152 let mut builder = match data_type {
153 None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
155 metadata.clone(),
156 capacity,
157 )),
158 Some(data_type) => {
159 make_typed_variant_to_arrow_row_builder(data_type, cast_options, capacity)?
160 }
161 };
162
163 if !path.is_empty() {
165 builder = WithPath(VariantPathRowBuilder {
166 builder: Box::new(builder),
167 path,
168 })
169 };
170
171 Ok(builder)
172}
173
174pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
178 Null(VariantToNullArrowRowBuilder<'a>),
179 Boolean(VariantToBooleanArrowRowBuilder<'a>),
180 Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
181 Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
182 Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
183 Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>),
184 UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
185 UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
186 UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
187 UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
188 Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
189 Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
190 Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
191 Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>),
192 Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>),
193 Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>),
194 Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>),
195 TimestampSecond(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampSecondType>),
196 TimestampSecondNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampSecondType>),
197 TimestampMilli(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMillisecondType>),
198 TimestampMilliNtz(
199 VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMillisecondType>,
200 ),
201 TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>),
202 TimestampMicroNtz(
203 VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>,
204 ),
205 TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
206 TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
207 Time32Second(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32SecondType>),
208 Time32Milli(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32MillisecondType>),
209 Time64Micro(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>),
210 Time64Nano(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64NanosecondType>),
211 Date32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
212 Date64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date64Type>),
213 Uuid(VariantToUuidArrowRowBuilder<'a>),
214 String(VariantToStringArrowBuilder<'a, StringBuilder>),
215 LargeString(VariantToStringArrowBuilder<'a, LargeStringBuilder>),
216 StringView(VariantToStringArrowBuilder<'a, StringViewBuilder>),
217 Binary(VariantToBinaryArrowRowBuilder<'a, BinaryBuilder>),
218 LargeBinary(VariantToBinaryArrowRowBuilder<'a, LargeBinaryBuilder>),
219 BinaryView(VariantToBinaryArrowRowBuilder<'a, BinaryViewBuilder>),
220}
221
222impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
223 pub fn append_null(&mut self) -> Result<()> {
224 use PrimitiveVariantToArrowRowBuilder::*;
225 match self {
226 Null(b) => b.append_null(),
227 Boolean(b) => b.append_null(),
228 Int8(b) => b.append_null(),
229 Int16(b) => b.append_null(),
230 Int32(b) => b.append_null(),
231 Int64(b) => b.append_null(),
232 UInt8(b) => b.append_null(),
233 UInt16(b) => b.append_null(),
234 UInt32(b) => b.append_null(),
235 UInt64(b) => b.append_null(),
236 Float16(b) => b.append_null(),
237 Float32(b) => b.append_null(),
238 Float64(b) => b.append_null(),
239 Decimal32(b) => b.append_null(),
240 Decimal64(b) => b.append_null(),
241 Decimal128(b) => b.append_null(),
242 Decimal256(b) => b.append_null(),
243 TimestampSecond(b) => b.append_null(),
244 TimestampSecondNtz(b) => b.append_null(),
245 TimestampMilli(b) => b.append_null(),
246 TimestampMilliNtz(b) => b.append_null(),
247 TimestampMicro(b) => b.append_null(),
248 TimestampMicroNtz(b) => b.append_null(),
249 TimestampNano(b) => b.append_null(),
250 TimestampNanoNtz(b) => b.append_null(),
251 Time32Second(b) => b.append_null(),
252 Time32Milli(b) => b.append_null(),
253 Time64Micro(b) => b.append_null(),
254 Time64Nano(b) => b.append_null(),
255 Date32(b) => b.append_null(),
256 Date64(b) => b.append_null(),
257 Uuid(b) => b.append_null(),
258 String(b) => b.append_null(),
259 LargeString(b) => b.append_null(),
260 StringView(b) => b.append_null(),
261 Binary(b) => b.append_null(),
262 LargeBinary(b) => b.append_null(),
263 BinaryView(b) => b.append_null(),
264 }
265 }
266
267 pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
268 use PrimitiveVariantToArrowRowBuilder::*;
269 match self {
270 Null(b) => b.append_value(value),
271 Boolean(b) => b.append_value(value),
272 Int8(b) => b.append_value(value),
273 Int16(b) => b.append_value(value),
274 Int32(b) => b.append_value(value),
275 Int64(b) => b.append_value(value),
276 UInt8(b) => b.append_value(value),
277 UInt16(b) => b.append_value(value),
278 UInt32(b) => b.append_value(value),
279 UInt64(b) => b.append_value(value),
280 Float16(b) => b.append_value(value),
281 Float32(b) => b.append_value(value),
282 Float64(b) => b.append_value(value),
283 Decimal32(b) => b.append_value(value),
284 Decimal64(b) => b.append_value(value),
285 Decimal128(b) => b.append_value(value),
286 Decimal256(b) => b.append_value(value),
287 TimestampSecond(b) => b.append_value(value),
288 TimestampSecondNtz(b) => b.append_value(value),
289 TimestampMilli(b) => b.append_value(value),
290 TimestampMilliNtz(b) => b.append_value(value),
291 TimestampMicro(b) => b.append_value(value),
292 TimestampMicroNtz(b) => b.append_value(value),
293 TimestampNano(b) => b.append_value(value),
294 TimestampNanoNtz(b) => b.append_value(value),
295 Time32Second(b) => b.append_value(value),
296 Time32Milli(b) => b.append_value(value),
297 Time64Micro(b) => b.append_value(value),
298 Time64Nano(b) => b.append_value(value),
299 Date32(b) => b.append_value(value),
300 Date64(b) => b.append_value(value),
301 Uuid(b) => b.append_value(value),
302 String(b) => b.append_value(value),
303 LargeString(b) => b.append_value(value),
304 StringView(b) => b.append_value(value),
305 Binary(b) => b.append_value(value),
306 LargeBinary(b) => b.append_value(value),
307 BinaryView(b) => b.append_value(value),
308 }
309 }
310
311 pub fn finish(self) -> Result<ArrayRef> {
312 use PrimitiveVariantToArrowRowBuilder::*;
313 match self {
314 Null(b) => b.finish(),
315 Boolean(b) => b.finish(),
316 Int8(b) => b.finish(),
317 Int16(b) => b.finish(),
318 Int32(b) => b.finish(),
319 Int64(b) => b.finish(),
320 UInt8(b) => b.finish(),
321 UInt16(b) => b.finish(),
322 UInt32(b) => b.finish(),
323 UInt64(b) => b.finish(),
324 Float16(b) => b.finish(),
325 Float32(b) => b.finish(),
326 Float64(b) => b.finish(),
327 Decimal32(b) => b.finish(),
328 Decimal64(b) => b.finish(),
329 Decimal128(b) => b.finish(),
330 Decimal256(b) => b.finish(),
331 TimestampSecond(b) => b.finish(),
332 TimestampSecondNtz(b) => b.finish(),
333 TimestampMilli(b) => b.finish(),
334 TimestampMilliNtz(b) => b.finish(),
335 TimestampMicro(b) => b.finish(),
336 TimestampMicroNtz(b) => b.finish(),
337 TimestampNano(b) => b.finish(),
338 TimestampNanoNtz(b) => b.finish(),
339 Time32Second(b) => b.finish(),
340 Time32Milli(b) => b.finish(),
341 Time64Micro(b) => b.finish(),
342 Time64Nano(b) => b.finish(),
343 Date32(b) => b.finish(),
344 Date64(b) => b.finish(),
345 Uuid(b) => b.finish(),
346 String(b) => b.finish(),
347 LargeString(b) => b.finish(),
348 StringView(b) => b.finish(),
349 Binary(b) => b.finish(),
350 LargeBinary(b) => b.finish(),
351 BinaryView(b) => b.finish(),
352 }
353 }
354}
355
356pub(crate) struct EncodedVariantToArrowRowBuilder<'a> {
357 data_type: &'a DataType,
358 cast_options: &'a CastOptions<'a>,
359 values_builder: Box<VariantToArrowRowBuilder<'a>>,
360}
361
362impl<'a> EncodedVariantToArrowRowBuilder<'a> {
363 fn try_new(
364 data_type: &'a DataType,
365 value_type: &'a DataType,
366 cast_options: &'a CastOptions,
367 capacity: usize,
368 ) -> Result<Self> {
369 let values_builder = Box::new(make_typed_variant_to_arrow_row_builder(
370 value_type,
371 cast_options,
372 capacity,
373 )?);
374 Ok(Self {
375 data_type,
376 cast_options,
377 values_builder,
378 })
379 }
380
381 fn append_null(&mut self) -> Result<()> {
382 self.values_builder.append_null()
383 }
384
385 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
386 self.values_builder.append_value(value)
387 }
388
389 fn finish(self) -> Result<ArrayRef> {
390 let values = self.values_builder.finish()?;
391 cast_with_options(values.as_ref(), self.data_type, self.cast_options)
392 }
393}
394
395pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
397 data_type: &'a DataType,
398 cast_options: &'a CastOptions,
399 capacity: usize,
400) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
401 use PrimitiveVariantToArrowRowBuilder::*;
402
403 let builder =
404 match data_type {
405 DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
406 DataType::Boolean => {
407 Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity))
408 }
409 DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
410 cast_options,
411 capacity,
412 )),
413 DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
414 cast_options,
415 capacity,
416 )),
417 DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
418 cast_options,
419 capacity,
420 )),
421 DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
422 cast_options,
423 capacity,
424 )),
425 DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
426 cast_options,
427 capacity,
428 )),
429 DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
430 cast_options,
431 capacity,
432 )),
433 DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
434 cast_options,
435 capacity,
436 )),
437 DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
438 cast_options,
439 capacity,
440 )),
441 DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
442 cast_options,
443 capacity,
444 )),
445 DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
446 cast_options,
447 capacity,
448 )),
449 DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
450 cast_options,
451 capacity,
452 )),
453 DataType::Decimal32(precision, scale) => Decimal32(
454 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
455 ),
456 DataType::Decimal64(precision, scale) => Decimal64(
457 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
458 ),
459 DataType::Decimal128(precision, scale) => Decimal128(
460 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
461 ),
462 DataType::Decimal256(precision, scale) => Decimal256(
463 VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
464 ),
465 DataType::Date32 => Date32(VariantToPrimitiveArrowRowBuilder::new(
466 cast_options,
467 capacity,
468 )),
469 DataType::Date64 => Date64(VariantToPrimitiveArrowRowBuilder::new(
470 cast_options,
471 capacity,
472 )),
473 DataType::Time32(TimeUnit::Second) => Time32Second(
474 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
475 ),
476 DataType::Time32(TimeUnit::Millisecond) => Time32Milli(
477 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
478 ),
479 DataType::Time32(t) => {
480 return Err(ArrowError::InvalidArgumentError(format!(
481 "The unit for Time32 must be second/millisecond, received {t:?}"
482 )));
483 }
484 DataType::Time64(TimeUnit::Microsecond) => Time64Micro(
485 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
486 ),
487 DataType::Time64(TimeUnit::Nanosecond) => Time64Nano(
488 VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
489 ),
490 DataType::Time64(t) => {
491 return Err(ArrowError::InvalidArgumentError(format!(
492 "The unit for Time64 must be micro/nano seconds, received {t:?}"
493 )));
494 }
495 DataType::Timestamp(TimeUnit::Second, None) => TimestampSecondNtz(
496 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
497 ),
498 DataType::Timestamp(TimeUnit::Second, tz) => TimestampSecond(
499 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
500 ),
501 DataType::Timestamp(TimeUnit::Millisecond, None) => TimestampMilliNtz(
502 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
503 ),
504 DataType::Timestamp(TimeUnit::Millisecond, tz) => TimestampMilli(
505 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
506 ),
507 DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
508 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
509 ),
510 DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
511 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
512 ),
513 DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
514 VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
515 ),
516 DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
517 VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
518 ),
519 DataType::Duration(_) | DataType::Interval(_) => {
520 return Err(ArrowError::InvalidArgumentError(
521 "Casting Variant to duration/interval types is not supported. \
522 The Variant format does not define duration/interval types."
523 .to_string(),
524 ));
525 }
526 DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)),
527 DataType::LargeBinary => {
528 LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
529 }
530 DataType::BinaryView => {
531 BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
532 }
533 DataType::FixedSizeBinary(16) => {
534 Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
535 }
536 DataType::FixedSizeBinary(_) => {
537 return Err(ArrowError::NotYetImplemented(format!(
538 "DataType {data_type:?} not yet implemented"
539 )));
540 }
541 DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)),
542 DataType::LargeUtf8 => {
543 LargeString(VariantToStringArrowBuilder::new(cast_options, capacity))
544 }
545 DataType::Utf8View => {
546 StringView(VariantToStringArrowBuilder::new(cast_options, capacity))
547 }
548 DataType::List(_)
549 | DataType::LargeList(_)
550 | DataType::ListView(_)
551 | DataType::LargeListView(_)
552 | DataType::FixedSizeList(..)
553 | DataType::Struct(_)
554 | DataType::Map(..)
555 | DataType::Union(..)
556 | DataType::Dictionary(..)
557 | DataType::RunEndEncoded(..) => {
558 return Err(ArrowError::InvalidArgumentError(format!(
559 "Casting to {data_type:?} is not applicable for primitive Variant types"
560 )));
561 }
562 };
563 Ok(builder)
564}
565
566pub(crate) enum ArrayVariantToArrowRowBuilder<'a> {
567 List(VariantToListArrowRowBuilder<'a, i32, false>),
568 LargeList(VariantToListArrowRowBuilder<'a, i64, false>),
569 ListView(VariantToListArrowRowBuilder<'a, i32, true>),
570 LargeListView(VariantToListArrowRowBuilder<'a, i64, true>),
571 FixedSizeList(VariantToFixedSizeListArrowRowBuilder<'a>),
572}
573
574pub(crate) struct StructVariantToArrowRowBuilder<'a> {
575 fields: &'a Fields,
576 field_builders: Vec<VariantToArrowRowBuilder<'a>>,
577 nulls: NullBufferBuilder,
578 cast_options: &'a CastOptions<'a>,
579}
580
581impl<'a> StructVariantToArrowRowBuilder<'a> {
582 fn try_new(
583 fields: &'a Fields,
584 cast_options: &'a CastOptions<'a>,
585 capacity: usize,
586 ) -> Result<Self> {
587 let mut field_builders = Vec::with_capacity(fields.len());
588 for field in fields.iter() {
589 field_builders.push(make_typed_variant_to_arrow_row_builder(
590 field.data_type(),
591 cast_options,
592 capacity,
593 )?);
594 }
595 Ok(Self {
596 fields,
597 field_builders,
598 nulls: NullBufferBuilder::new(capacity),
599 cast_options,
600 })
601 }
602
603 fn append_null(&mut self) -> Result<()> {
604 for builder in &mut self.field_builders {
605 builder.append_null()?;
606 }
607 self.nulls.append_null();
608 Ok(())
609 }
610
611 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
612 match variant_cast_with_options(value, self.cast_options, Variant::as_object) {
613 Ok(Some(obj)) => {
614 for (index, field) in self.fields.iter().enumerate() {
615 match obj.get(field.name()) {
616 Some(field_value) => {
617 self.field_builders[index].append_value(field_value)?;
618 }
619 None => {
620 self.field_builders[index].append_null()?;
621 }
622 }
623 }
624
625 self.nulls.append_non_null();
626 Ok(true)
627 }
628 Ok(None) => {
629 self.append_null()?;
630 Ok(false)
631 }
632 Err(_) => Err(ArrowError::CastError(format!(
633 "Failed to extract struct from variant {value:?}"
634 ))),
635 }
636 }
637
638 fn finish(mut self) -> Result<ArrayRef> {
639 let mut children = Vec::with_capacity(self.field_builders.len());
640 for builder in self.field_builders {
641 children.push(builder.finish()?);
642 }
643 Ok(Arc::new(StructArray::try_new(
644 self.fields.clone(),
645 children,
646 self.nulls.finish(),
647 )?))
648 }
649}
650
651impl<'a> ArrayVariantToArrowRowBuilder<'a> {
652 pub(crate) fn try_new(
659 data_type: &'a DataType,
660 cast_options: &'a CastOptions,
661 capacity: usize,
662 shredded: bool,
663 ) -> Result<Self> {
664 use ArrayVariantToArrowRowBuilder::*;
665
666 macro_rules! make_list_builder {
668 ($variant:ident, $offset:ty, $is_view:expr, $field:ident) => {
669 $variant(VariantToListArrowRowBuilder::<$offset, $is_view>::try_new(
670 $field.clone(),
671 $field.data_type(),
672 cast_options,
673 capacity,
674 shredded,
675 )?)
676 };
677 }
678
679 let builder = match data_type {
680 DataType::List(field) => make_list_builder!(List, i32, false, field),
681 DataType::LargeList(field) => make_list_builder!(LargeList, i64, false, field),
682 DataType::ListView(field) => make_list_builder!(ListView, i32, true, field),
683 DataType::LargeListView(field) => make_list_builder!(LargeListView, i64, true, field),
684 DataType::FixedSizeList(field, size) => {
685 FixedSizeList(VariantToFixedSizeListArrowRowBuilder::try_new(
686 field.clone(),
687 field.data_type(),
688 *size,
689 cast_options,
690 capacity,
691 shredded,
692 )?)
693 }
694 other => {
695 return Err(ArrowError::InvalidArgumentError(format!(
696 "Casting to {other:?} is not applicable for array Variant types"
697 )));
698 }
699 };
700 Ok(builder)
701 }
702
703 pub(crate) fn append_null(&mut self) -> Result<()> {
704 match self {
705 Self::List(builder) => builder.append_null(),
706 Self::LargeList(builder) => builder.append_null(),
707 Self::ListView(builder) => builder.append_null(),
708 Self::LargeListView(builder) => builder.append_null(),
709 Self::FixedSizeList(builder) => builder.append_null(),
710 }
711 }
712
713 pub(crate) fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
714 match self {
715 Self::List(builder) => builder.append_value(value),
716 Self::LargeList(builder) => builder.append_value(value),
717 Self::ListView(builder) => builder.append_value(value),
718 Self::LargeListView(builder) => builder.append_value(value),
719 Self::FixedSizeList(builder) => builder.append_value(value),
720 }
721 }
722
723 pub(crate) fn finish(self) -> Result<ArrayRef> {
724 match self {
725 Self::List(builder) => builder.finish(),
726 Self::LargeList(builder) => builder.finish(),
727 Self::ListView(builder) => builder.finish(),
728 Self::LargeListView(builder) => builder.finish(),
729 Self::FixedSizeList(builder) => builder.finish(),
730 }
731 }
732}
733
734pub(crate) struct VariantPathRowBuilder<'a> {
737 builder: Box<VariantToArrowRowBuilder<'a>>,
738 path: VariantPath<'a>,
739}
740
741impl<'a> VariantPathRowBuilder<'a> {
742 fn append_null(&mut self) -> Result<()> {
743 self.builder.append_null()
744 }
745
746 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
747 if let Some(v) = value.get_path(&self.path) {
748 self.builder.append_value(v)
749 } else {
750 self.builder.append_null()?;
751 Ok(false)
752 }
753 }
754
755 fn finish(self) -> Result<ArrayRef> {
756 self.builder.finish()
757 }
758}
759
760macro_rules! define_variant_to_primitive_builder {
761 (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
762 |$array_param:ident $(, $field:ident: $field_type:ty)?| -> $builder_name:ident $(< $array_type:ty >)? { $init_expr: expr },
763 |$value: ident| $value_transform:expr,
764 type_name: $type_name:expr) => {
765 pub(crate) struct $name<$lifetime $(, $generic : $bound )?>
766 {
767 builder: $builder_name $(<$array_type>)?,
768 cast_options: &$lifetime CastOptions<$lifetime>,
769 }
770
771 impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic )?> {
772 fn new(
773 cast_options: &$lifetime CastOptions<$lifetime>,
774 $array_param: usize,
775 $( $field: $field_type, )?
777 ) -> Self {
778 Self {
779 builder: $init_expr,
780 cast_options,
781 }
782 }
783
784 fn append_null(&mut self) -> Result<()> {
785 self.builder.append_null();
786 Ok(())
787 }
788
789 fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result<bool> {
790 match variant_cast_with_options(
791 $value,
792 self.cast_options,
793 |$value| $value_transform,
794 ) {
795 Ok(Some(v)) => {
796 self.builder.append_value(v);
797 Ok(true)
798 }
799 Ok(None) => {
800 self.builder.append_null();
801 Ok(false)
802 }
803 Err(_) => Err(ArrowError::CastError(format!(
804 "Failed to extract primitive of type {type_name} from variant {value:?} at path VariantPath([])",
805 type_name = $type_name,
806 value = $value
807 ))),
808 }
809 }
810
811 #[allow(unused_mut)]
814 fn finish(mut self) -> Result<ArrayRef> {
815 Ok(Arc::from(self.builder.finish()))
819 }
820 }
821 }
822}
823
824define_variant_to_primitive_builder!(
825 struct VariantToStringArrowBuilder<'a, B: StringLikeArrayBuilder>
826 |capacity| -> B { B::with_capacity(capacity) },
827 |value| value.as_string(),
828 type_name: B::type_name()
829);
830
831define_variant_to_primitive_builder!(
832 struct VariantToBooleanArrowRowBuilder<'a>
833 |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
834 |value| value.as_boolean(),
835 type_name: datatypes::BooleanType::DATA_TYPE
836);
837
838define_variant_to_primitive_builder!(
839 struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant>
840 |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
841 |value| T::from_variant(value),
842 type_name: T::DATA_TYPE
843);
844
845define_variant_to_primitive_builder!(
846 struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant<true>>
847 |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
848 |value| T::from_variant(value),
849 type_name: T::DATA_TYPE
850);
851
852define_variant_to_primitive_builder!(
853 struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
854 |capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
855 PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
856 },
857 |value| T::from_variant(value),
858 type_name: T::DATA_TYPE
859);
860
861define_variant_to_primitive_builder!(
862 struct VariantToBinaryArrowRowBuilder<'a, B: BinaryLikeArrayBuilder>
863 |capacity| -> B { B::with_capacity(capacity) },
864 |value| value.as_u8_slice(),
865 type_name: B::type_name()
866);
867
868pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T>
870where
871 T: DecimalType,
872 T::Native: DecimalCast,
873{
874 builder: PrimitiveBuilder<T>,
875 cast_options: &'a CastOptions<'a>,
876 precision: u8,
877 scale: i8,
878}
879
880impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T>
881where
882 T: DecimalType,
883 T::Native: DecimalCast,
884{
885 fn new(
886 cast_options: &'a CastOptions<'a>,
887 capacity: usize,
888 precision: u8,
889 scale: i8,
890 ) -> Result<Self> {
891 let builder = PrimitiveBuilder::<T>::with_capacity(capacity)
892 .with_precision_and_scale(precision, scale)?;
893 Ok(Self {
894 builder,
895 cast_options,
896 precision,
897 scale,
898 })
899 }
900
901 fn append_null(&mut self) -> Result<()> {
902 self.builder.append_null();
903 Ok(())
904 }
905
906 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
907 match variant_cast_with_options(value, self.cast_options, |value| {
908 variant_to_unscaled_decimal::<T>(value, self.precision, self.scale)
909 }) {
910 Ok(Some(scaled)) => {
911 self.builder.append_value(scaled);
912 Ok(true)
913 }
914 Ok(None) => {
915 self.builder.append_null();
916 Ok(false)
917 }
918 Err(_) => Err(ArrowError::CastError(format!(
919 "Failed to cast to {prefix}(precision={precision}, scale={scale}) from variant {value:?}",
920 prefix = T::PREFIX,
921 precision = self.precision,
922 scale = self.scale
923 ))),
924 }
925 }
926
927 fn finish(mut self) -> Result<ArrayRef> {
928 Ok(Arc::new(self.builder.finish()))
929 }
930}
931
932pub(crate) struct VariantToUuidArrowRowBuilder<'a> {
934 builder: FixedSizeBinaryBuilder,
935 cast_options: &'a CastOptions<'a>,
936}
937
938impl<'a> VariantToUuidArrowRowBuilder<'a> {
939 fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
940 Self {
941 builder: FixedSizeBinaryBuilder::with_capacity(capacity, 16),
942 cast_options,
943 }
944 }
945
946 fn append_null(&mut self) -> Result<()> {
947 self.builder.append_null();
948 Ok(())
949 }
950
951 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
952 match variant_cast_with_options(value, self.cast_options, Variant::as_uuid) {
953 Ok(Some(uuid)) => {
954 self.builder
955 .append_value(uuid.as_bytes())
956 .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
957 Ok(true)
958 }
959 Ok(None) => {
960 self.builder.append_null();
961 Ok(false)
962 }
963 Err(_) => Err(ArrowError::CastError(format!(
964 "Failed to extract UUID from variant {value:?}"
965 ))),
966 }
967 }
968
969 fn finish(mut self) -> Result<ArrayRef> {
970 Ok(Arc::new(self.builder.finish()))
971 }
972}
973
974enum ListElementBuilder<'a> {
977 Typed(Box<VariantToArrowRowBuilder<'a>>),
979 Shredded(Box<VariantToShreddedVariantRowBuilder<'a>>),
981}
982
983impl<'a> ListElementBuilder<'a> {
984 fn append_null(&mut self) -> Result<()> {
985 match self {
986 Self::Typed(b) => b.append_null(),
987 Self::Shredded(b) => b.append_null(),
988 }
989 }
990
991 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
992 match self {
993 Self::Typed(b) => b.append_value(value),
994 Self::Shredded(b) => b.append_value(value),
995 }
996 }
997
998 fn finish(self) -> Result<ArrayRef> {
999 match self {
1000 Self::Typed(b) => b.finish(),
1001 Self::Shredded(b) => {
1002 let (value, typed_value, nulls) = b.finish()?;
1003 Ok(ArrayRef::from(ShreddedVariantFieldArray::from_parts(
1004 Some(Arc::new(value)),
1005 Some(typed_value),
1006 nulls,
1007 )))
1008 }
1009 }
1010 }
1011}
1012
1013pub(crate) struct VariantToListArrowRowBuilder<'a, O, const IS_VIEW: bool>
1014where
1015 O: OffsetSizeTrait + ArrowNativeTypeOp,
1016{
1017 field: FieldRef,
1018 offsets: Vec<O>,
1019 element_builder: ListElementBuilder<'a>,
1020 nulls: NullBufferBuilder,
1021 current_offset: O,
1022 cast_options: &'a CastOptions<'a>,
1023}
1024
1025impl<'a, O, const IS_VIEW: bool> VariantToListArrowRowBuilder<'a, O, IS_VIEW>
1026where
1027 O: OffsetSizeTrait + ArrowNativeTypeOp,
1028{
1029 fn try_new(
1030 field: FieldRef,
1031 element_data_type: &'a DataType,
1032 cast_options: &'a CastOptions,
1033 capacity: usize,
1034 shredded: bool,
1035 ) -> Result<Self> {
1036 if capacity >= isize::MAX as usize {
1037 return Err(ArrowError::ComputeError(
1038 "Capacity exceeds isize::MAX when reserving list offsets".to_string(),
1039 ));
1040 }
1041 let mut offsets = Vec::with_capacity(capacity + 1);
1042 offsets.push(O::ZERO);
1043 let element_builder = if shredded {
1044 let builder = make_variant_to_shredded_variant_arrow_row_builder(
1045 element_data_type,
1046 cast_options,
1047 capacity,
1048 NullValue::ArrayElement,
1049 )?;
1050 ListElementBuilder::Shredded(Box::new(builder))
1051 } else {
1052 let builder =
1053 make_typed_variant_to_arrow_row_builder(element_data_type, cast_options, capacity)?;
1054 ListElementBuilder::Typed(Box::new(builder))
1055 };
1056
1057 Ok(Self {
1058 field,
1059 offsets,
1060 element_builder,
1061 nulls: NullBufferBuilder::new(capacity),
1062 current_offset: O::ZERO,
1063 cast_options,
1064 })
1065 }
1066
1067 fn append_null(&mut self) -> Result<()> {
1068 self.offsets.push(self.current_offset);
1069 self.nulls.append_null();
1070 Ok(())
1071 }
1072
1073 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
1074 match variant_cast_with_options(value, self.cast_options, Variant::as_list) {
1075 Ok(Some(list)) => {
1076 for element in list.iter() {
1077 self.element_builder.append_value(element)?;
1078 self.current_offset = self.current_offset.add_checked(O::ONE)?;
1079 }
1080 self.offsets.push(self.current_offset);
1081 self.nulls.append_non_null();
1082 Ok(true)
1083 }
1084 Ok(None) => {
1085 self.append_null()?;
1086 Ok(false)
1087 }
1088 Err(_) => Err(ArrowError::CastError(format!(
1089 "Failed to extract list from variant {value:?}"
1090 ))),
1091 }
1092 }
1093
1094 fn finish(mut self) -> Result<ArrayRef> {
1095 let element_array: ArrayRef = self.element_builder.finish()?;
1096 let field = Arc::new(
1097 self.field
1098 .as_ref()
1099 .clone()
1100 .with_data_type(element_array.data_type().clone()),
1101 );
1102
1103 if IS_VIEW {
1104 let mut sizes = Vec::with_capacity(self.offsets.len() - 1);
1106 for i in 1..self.offsets.len() {
1107 sizes.push(self.offsets[i] - self.offsets[i - 1]);
1108 }
1109 self.offsets.pop();
1110 let list_view_array = GenericListViewArray::<O>::new(
1111 field,
1112 ScalarBuffer::from(self.offsets),
1113 ScalarBuffer::from(sizes),
1114 element_array,
1115 self.nulls.finish(),
1116 );
1117 Ok(Arc::new(list_view_array))
1118 } else {
1119 let list_array = GenericListArray::<O>::new(
1120 field,
1121 OffsetBuffer::<O>::new(ScalarBuffer::from(self.offsets)),
1122 element_array,
1123 self.nulls.finish(),
1124 );
1125 Ok(Arc::new(list_array))
1126 }
1127 }
1128}
1129
1130pub(crate) struct VariantToFixedSizeListArrowRowBuilder<'a> {
1131 field: FieldRef,
1132 list_size: i32,
1133 element_builder: ListElementBuilder<'a>,
1134 nulls: NullBufferBuilder,
1135 cast_options: &'a CastOptions<'a>,
1136 shredded: bool,
1137}
1138
1139impl<'a> VariantToFixedSizeListArrowRowBuilder<'a> {
1140 fn try_new(
1141 field: FieldRef,
1142 element_data_type: &'a DataType,
1143 list_size: i32,
1144 cast_options: &'a CastOptions,
1145 capacity: usize,
1146 shredded: bool,
1147 ) -> Result<Self> {
1148 let element_builder = if shredded {
1149 let builder = make_variant_to_shredded_variant_arrow_row_builder(
1150 element_data_type,
1151 cast_options,
1152 capacity,
1153 NullValue::ArrayElement,
1154 )?;
1155 ListElementBuilder::Shredded(Box::new(builder))
1156 } else {
1157 let builder =
1158 make_typed_variant_to_arrow_row_builder(element_data_type, cast_options, capacity)?;
1159 ListElementBuilder::Typed(Box::new(builder))
1160 };
1161 Ok(Self {
1162 field,
1163 list_size,
1164 element_builder,
1165 nulls: NullBufferBuilder::new(capacity),
1166 cast_options,
1167 shredded,
1168 })
1169 }
1170
1171 fn append_null(&mut self) -> Result<()> {
1172 for _ in 0..self.list_size {
1173 self.element_builder.append_null()?;
1174 }
1175 self.nulls.append_null();
1176 Ok(())
1177 }
1178
1179 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
1180 match variant_cast_with_options(value, self.cast_options, Variant::as_list) {
1181 Ok(Some(list)) => {
1182 let len = list.len();
1183 if len != self.list_size as usize {
1184 if self.cast_options.safe && !self.shredded {
1185 self.append_null()?;
1186 return Ok(false);
1187 }
1188 return Err(ArrowError::CastError(format!(
1189 "Expected fixed size list of size {}, got size {}",
1190 self.list_size, len
1191 )));
1192 }
1193 for element in list.iter() {
1194 self.element_builder.append_value(element)?;
1195 }
1196 self.nulls.append_non_null();
1197 Ok(true)
1198 }
1199 Ok(None) => {
1200 self.append_null()?;
1201 Ok(false)
1202 }
1203 Err(_) => Err(ArrowError::CastError(format!(
1204 "Failed to extract list from variant {value:?}"
1205 ))),
1206 }
1207 }
1208
1209 fn finish(mut self) -> Result<ArrayRef> {
1210 let element_array: ArrayRef = self.element_builder.finish()?;
1211 let field = Arc::new(
1212 self.field
1213 .as_ref()
1214 .clone()
1215 .with_data_type(element_array.data_type().clone()),
1216 );
1217 let fixed_size_list_array =
1218 FixedSizeListArray::try_new(field, self.list_size, element_array, self.nulls.finish())?;
1219 Ok(Arc::new(fixed_size_list_array))
1220 }
1221}
1222
1223pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
1225 metadata: ArrayRef,
1226 builder: VariantValueArrayBuilder,
1227 nulls: NullBufferBuilder,
1228}
1229
1230impl VariantToBinaryVariantArrowRowBuilder {
1231 fn new(metadata: ArrayRef, capacity: usize) -> Self {
1232 Self {
1233 metadata,
1234 builder: VariantValueArrayBuilder::new(capacity),
1235 nulls: NullBufferBuilder::new(capacity),
1236 }
1237 }
1238}
1239
1240impl VariantToBinaryVariantArrowRowBuilder {
1241 fn append_null(&mut self) -> Result<()> {
1242 self.builder.append_null();
1243 self.nulls.append_null();
1244 Ok(())
1245 }
1246
1247 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
1248 self.builder.append_value(value);
1249 self.nulls.append_non_null();
1250 Ok(true)
1251 }
1252
1253 fn finish(mut self) -> Result<ArrayRef> {
1254 let variant_array = VariantArray::from_parts(
1255 self.metadata,
1256 Some(Arc::new(self.builder.build()?)),
1257 None, self.nulls.finish(),
1259 );
1260
1261 Ok(ArrayRef::from(variant_array))
1262 }
1263}
1264
1265#[derive(Default)]
1266struct FakeNullBuilder {
1267 item_count: usize,
1268}
1269
1270impl FakeNullBuilder {
1271 fn append_value(&mut self, _: ()) {
1272 self.item_count += 1;
1273 }
1274
1275 fn append_null(&mut self) {
1276 self.item_count += 1;
1277 }
1278
1279 fn finish(self) -> NullArray {
1280 NullArray::new(self.item_count)
1281 }
1282}
1283
1284define_variant_to_primitive_builder!(
1285 struct VariantToNullArrowRowBuilder<'a>
1286 |_capacity| -> FakeNullBuilder { FakeNullBuilder::default() },
1287 |value| value.as_null(),
1288 type_name: "Null"
1289);
1290
1291#[cfg(test)]
1292mod tests {
1293 use super::{
1294 make_primitive_variant_to_arrow_row_builder, make_typed_variant_to_arrow_row_builder,
1295 };
1296 use arrow::array::{
1297 Array, Decimal32Array, FixedSizeBinaryArray, Int32Array, ListArray, StructArray,
1298 };
1299 use arrow::compute::CastOptions;
1300 use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode};
1301 use arrow::error::ArrowError;
1302 use parquet_variant::{Variant, VariantDecimal4};
1303 use std::sync::Arc;
1304 use uuid::Uuid;
1305
1306 #[test]
1307 fn make_primitive_builder_rejects_non_primitive_types() {
1308 let cast_options = CastOptions::default();
1309 let item_field = Arc::new(Field::new("item", DataType::Int32, true));
1310 let struct_fields = Fields::from(vec![Field::new("child", DataType::Int32, true)]);
1311 let map_entries_field = Arc::new(Field::new(
1312 "entries",
1313 DataType::Struct(Fields::from(vec![
1314 Field::new("key", DataType::Utf8, false),
1315 Field::new("value", DataType::Float64, true),
1316 ])),
1317 true,
1318 ));
1319 let union_fields =
1320 UnionFields::try_new(vec![1], vec![Field::new("child", DataType::Int32, true)])
1321 .unwrap();
1322 let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false));
1323 let ree_values_field = Arc::new(Field::new("values", DataType::Utf8, true));
1324
1325 let non_primitive_types = vec![
1326 DataType::List(item_field.clone()),
1327 DataType::LargeList(item_field.clone()),
1328 DataType::ListView(item_field.clone()),
1329 DataType::LargeListView(item_field.clone()),
1330 DataType::FixedSizeList(item_field.clone(), 2),
1331 DataType::Struct(struct_fields.clone()),
1332 DataType::Map(map_entries_field.clone(), false),
1333 DataType::Union(union_fields.clone(), UnionMode::Dense),
1334 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1335 DataType::RunEndEncoded(run_ends_field.clone(), ree_values_field.clone()),
1336 ];
1337
1338 for data_type in non_primitive_types {
1339 let err =
1340 match make_primitive_variant_to_arrow_row_builder(&data_type, &cast_options, 1) {
1341 Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"),
1342 Err(err) => err,
1343 };
1344
1345 match err {
1346 ArrowError::InvalidArgumentError(msg) => {
1347 assert!(msg.contains(&format!("{data_type:?}")));
1348 }
1349 other => panic!("expected InvalidArgumentError, got {other:?}"),
1350 }
1351 }
1352 }
1353
1354 #[test]
1355 fn strict_cast_allows_variant_null_for_primitive_builder() {
1356 let cast_options = CastOptions {
1357 safe: false,
1358 ..Default::default()
1359 };
1360 let mut builder =
1361 make_primitive_variant_to_arrow_row_builder(&DataType::Int32, &cast_options, 2)
1362 .unwrap();
1363
1364 assert!(!builder.append_value(&Variant::Null).unwrap());
1365 assert!(builder.append_value(&Variant::Int32(42)).unwrap());
1366
1367 let array = builder.finish().unwrap();
1368 let int_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1369 assert!(int_array.is_null(0));
1370 assert_eq!(int_array.value(1), 42);
1371 }
1372
1373 #[test]
1374 fn strict_cast_allows_variant_null_for_decimal_builder() {
1375 let cast_options = CastOptions {
1376 safe: false,
1377 ..Default::default()
1378 };
1379 let mut builder = make_primitive_variant_to_arrow_row_builder(
1380 &DataType::Decimal32(9, 2),
1381 &cast_options,
1382 2,
1383 )
1384 .unwrap();
1385 let decimal_variant: Variant<'_, '_> = VariantDecimal4::try_new(1234, 2).unwrap().into();
1386
1387 assert!(!builder.append_value(&Variant::Null).unwrap());
1388 assert!(builder.append_value(&decimal_variant).unwrap());
1389
1390 let array = builder.finish().unwrap();
1391 let decimal_array = array.as_any().downcast_ref::<Decimal32Array>().unwrap();
1392 assert!(decimal_array.is_null(0));
1393 assert_eq!(decimal_array.value(1), 1234);
1394 }
1395
1396 #[test]
1397 fn strict_cast_allows_variant_null_for_uuid_builder() {
1398 let cast_options = CastOptions {
1399 safe: false,
1400 ..Default::default()
1401 };
1402 let mut builder = make_primitive_variant_to_arrow_row_builder(
1403 &DataType::FixedSizeBinary(16),
1404 &cast_options,
1405 2,
1406 )
1407 .unwrap();
1408 let uuid = Uuid::nil();
1409
1410 assert!(!builder.append_value(&Variant::Null).unwrap());
1411 assert!(builder.append_value(&Variant::Uuid(uuid)).unwrap());
1412
1413 let array = builder.finish().unwrap();
1414 let uuid_array = array
1415 .as_any()
1416 .downcast_ref::<FixedSizeBinaryArray>()
1417 .unwrap();
1418 assert!(uuid_array.is_null(0));
1419 assert_eq!(uuid_array.value(1), uuid.as_bytes());
1420 }
1421
1422 #[test]
1423 fn strict_cast_allows_variant_null_for_list_and_struct_builders() {
1424 let cast_options = CastOptions {
1425 safe: false,
1426 ..Default::default()
1427 };
1428
1429 let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
1430 let mut list_builder =
1431 make_typed_variant_to_arrow_row_builder(&list_type, &cast_options, 1).unwrap();
1432 assert!(!list_builder.append_value(Variant::Null).unwrap());
1433 let list_array = list_builder.finish().unwrap();
1434 let list_array = list_array.as_any().downcast_ref::<ListArray>().unwrap();
1435 assert!(list_array.is_null(0));
1436
1437 let struct_type =
1438 DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)]));
1439 let mut struct_builder =
1440 make_typed_variant_to_arrow_row_builder(&struct_type, &cast_options, 1).unwrap();
1441 assert!(!struct_builder.append_value(Variant::Null).unwrap());
1442 let struct_array = struct_builder.finish().unwrap();
1443 let struct_array = struct_array.as_any().downcast_ref::<StructArray>().unwrap();
1444 assert!(struct_array.is_null(0));
1445 }
1446}