1use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray};
21use arrow::buffer::NullBuffer;
22use arrow::datatypes::{Int16Type, Int32Type};
23use arrow_schema::{ArrowError, DataType};
24use parquet_variant::Variant;
25use std::any::Any;
26use std::sync::Arc;
27
28use crate::type_conversion::primitive_conversion_single_value;
29
30#[derive(Debug)]
49pub struct VariantArray {
50 inner: StructArray,
52
53 shredding_state: ShreddingState,
55}
56
57impl VariantArray {
58 pub fn try_new(inner: ArrayRef) -> Result<Self, ArrowError> {
86 let Some(inner) = inner.as_struct_opt() else {
87 return Err(ArrowError::InvalidArgumentError(
88 "Invalid VariantArray: requires StructArray as input".to_string(),
89 ));
90 };
91
92 let Some(metadata_field) = inner.column_by_name("metadata") else {
96 return Err(ArrowError::InvalidArgumentError(
97 "Invalid VariantArray: StructArray must contain a 'metadata' field".to_string(),
98 ));
99 };
100 let Some(metadata) = metadata_field.as_binary_view_opt() else {
101 return Err(ArrowError::NotYetImplemented(format!(
102 "VariantArray 'metadata' field must be BinaryView, got {}",
103 metadata_field.data_type()
104 )));
105 };
106
107 let value = inner
109 .column_by_name("value")
110 .map(|v| {
111 v.as_binary_view_opt().ok_or_else(|| {
112 ArrowError::NotYetImplemented(format!(
113 "VariantArray 'value' field must be BinaryView, got {}",
114 v.data_type()
115 ))
116 })
117 })
118 .transpose()?;
119
120 let typed_value = inner.column_by_name("typed_value");
122
123 let inner = inner.clone();
125 let shredding_state =
126 ShreddingState::try_new(metadata.clone(), value.cloned(), typed_value.cloned())?;
127
128 Ok(Self {
129 inner,
130 shredding_state,
131 })
132 }
133
134 pub fn inner(&self) -> &StructArray {
136 &self.inner
137 }
138
139 pub fn into_inner(self) -> StructArray {
141 self.inner
142 }
143
144 pub fn shredding_state(&self) -> &ShreddingState {
146 &self.shredding_state
147 }
148
149 pub fn value(&self, index: usize) -> Variant<'_, '_> {
170 match &self.shredding_state {
171 ShreddingState::Unshredded { metadata, value } => {
172 Variant::new(metadata.value(index), value.value(index))
173 }
174 ShreddingState::Typed { typed_value, .. } => {
175 if typed_value.is_null(index) {
176 Variant::Null
177 } else {
178 typed_value_to_variant(typed_value, index)
179 }
180 }
181 ShreddingState::PartiallyShredded {
182 metadata,
183 value,
184 typed_value,
185 } => {
186 if typed_value.is_null(index) {
187 Variant::new(metadata.value(index), value.value(index))
188 } else {
189 typed_value_to_variant(typed_value, index)
190 }
191 }
192 ShreddingState::AllNull { .. } => {
193 Variant::Null
198 }
199 }
200 }
201
202 pub fn metadata_field(&self) -> &BinaryViewArray {
204 self.shredding_state.metadata_field()
205 }
206
207 pub fn value_field(&self) -> Option<&BinaryViewArray> {
209 self.shredding_state.value_field()
210 }
211
212 pub fn typed_value_field(&self) -> Option<&ArrayRef> {
214 self.shredding_state.typed_value_field()
215 }
216}
217
218#[derive(Debug)]
237pub enum ShreddingState {
238 Unshredded {
240 metadata: BinaryViewArray,
241 value: BinaryViewArray,
242 },
243 Typed {
246 metadata: BinaryViewArray,
247 typed_value: ArrayRef,
248 },
249 PartiallyShredded {
256 metadata: BinaryViewArray,
257 value: BinaryViewArray,
258 typed_value: ArrayRef,
259 },
260 AllNull { metadata: BinaryViewArray },
267}
268
269impl ShreddingState {
270 pub fn try_new(
272 metadata: BinaryViewArray,
273 value: Option<BinaryViewArray>,
274 typed_value: Option<ArrayRef>,
275 ) -> Result<Self, ArrowError> {
276 match (metadata, value, typed_value) {
277 (metadata, Some(value), Some(typed_value)) => Ok(Self::PartiallyShredded {
278 metadata,
279 value,
280 typed_value,
281 }),
282 (metadata, Some(value), None) => Ok(Self::Unshredded { metadata, value }),
283 (metadata, None, Some(typed_value)) => Ok(Self::Typed {
284 metadata,
285 typed_value,
286 }),
287 (metadata, None, None) => Ok(Self::AllNull { metadata }),
288 }
289 }
290
291 pub fn metadata_field(&self) -> &BinaryViewArray {
293 match self {
294 ShreddingState::Unshredded { metadata, .. } => metadata,
295 ShreddingState::Typed { metadata, .. } => metadata,
296 ShreddingState::PartiallyShredded { metadata, .. } => metadata,
297 ShreddingState::AllNull { metadata } => metadata,
298 }
299 }
300
301 pub fn value_field(&self) -> Option<&BinaryViewArray> {
303 match self {
304 ShreddingState::Unshredded { value, .. } => Some(value),
305 ShreddingState::Typed { .. } => None,
306 ShreddingState::PartiallyShredded { value, .. } => Some(value),
307 ShreddingState::AllNull { .. } => None,
308 }
309 }
310
311 pub fn typed_value_field(&self) -> Option<&ArrayRef> {
313 match self {
314 ShreddingState::Unshredded { .. } => None,
315 ShreddingState::Typed { typed_value, .. } => Some(typed_value),
316 ShreddingState::PartiallyShredded { typed_value, .. } => Some(typed_value),
317 ShreddingState::AllNull { .. } => None,
318 }
319 }
320
321 pub fn slice(&self, offset: usize, length: usize) -> Self {
323 match self {
324 ShreddingState::Unshredded { metadata, value } => ShreddingState::Unshredded {
325 metadata: metadata.slice(offset, length),
326 value: value.slice(offset, length),
327 },
328 ShreddingState::Typed {
329 metadata,
330 typed_value,
331 } => ShreddingState::Typed {
332 metadata: metadata.slice(offset, length),
333 typed_value: typed_value.slice(offset, length),
334 },
335 ShreddingState::PartiallyShredded {
336 metadata,
337 value,
338 typed_value,
339 } => ShreddingState::PartiallyShredded {
340 metadata: metadata.slice(offset, length),
341 value: value.slice(offset, length),
342 typed_value: typed_value.slice(offset, length),
343 },
344 ShreddingState::AllNull { metadata } => ShreddingState::AllNull {
345 metadata: metadata.slice(offset, length),
346 },
347 }
348 }
349}
350
351fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, '_> {
353 match typed_value.data_type() {
354 DataType::Int32 => {
355 primitive_conversion_single_value!(Int32Type, typed_value, index)
356 }
357 DataType::Int16 => {
358 primitive_conversion_single_value!(Int16Type, typed_value, index)
359 }
360 _ => {
363 debug_assert!(
367 false,
368 "Unsupported typed_value type: {:?}",
369 typed_value.data_type()
370 );
371 Variant::Null
372 }
373 }
374}
375
376impl Array for VariantArray {
377 fn as_any(&self) -> &dyn Any {
378 self
379 }
380
381 fn to_data(&self) -> ArrayData {
382 self.inner.to_data()
383 }
384
385 fn into_data(self) -> ArrayData {
386 self.inner.into_data()
387 }
388
389 fn data_type(&self) -> &DataType {
390 self.inner.data_type()
391 }
392
393 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
394 let inner = self.inner.slice(offset, length);
395 let shredding_state = self.shredding_state.slice(offset, length);
396 Arc::new(Self {
397 inner,
398 shredding_state,
399 })
400 }
401
402 fn len(&self) -> usize {
403 self.inner.len()
404 }
405
406 fn is_empty(&self) -> bool {
407 self.inner.is_empty()
408 }
409
410 fn offset(&self) -> usize {
411 self.inner.offset()
412 }
413
414 fn nulls(&self) -> Option<&NullBuffer> {
415 self.inner.nulls()
416 }
417
418 fn get_buffer_memory_size(&self) -> usize {
419 self.inner.get_buffer_memory_size()
420 }
421
422 fn get_array_memory_size(&self) -> usize {
423 self.inner.get_array_memory_size()
424 }
425}
426
427#[cfg(test)]
428mod test {
429 use super::*;
430 use arrow::array::{BinaryArray, BinaryViewArray};
431 use arrow_schema::{Field, Fields};
432
433 #[test]
434 fn invalid_not_a_struct_array() {
435 let array = make_binary_view_array();
436 let err = VariantArray::try_new(array);
438 assert_eq!(
439 err.unwrap_err().to_string(),
440 "Invalid argument error: Invalid VariantArray: requires StructArray as input"
441 );
442 }
443
444 #[test]
445 fn invalid_missing_metadata() {
446 let fields = Fields::from(vec![Field::new("value", DataType::BinaryView, true)]);
447 let array = StructArray::new(fields, vec![make_binary_view_array()], None);
448 let err = VariantArray::try_new(Arc::new(array));
450 assert_eq!(
451 err.unwrap_err().to_string(),
452 "Invalid argument error: Invalid VariantArray: StructArray must contain a 'metadata' field"
453 );
454 }
455
456 #[test]
457 fn all_null_missing_value_and_typed_value() {
458 let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
459 let array = StructArray::new(fields, vec![make_binary_view_array()], None);
460
461 let variant_array = VariantArray::try_new(Arc::new(array)).unwrap();
465
466 assert!(matches!(
468 variant_array.shredding_state(),
469 ShreddingState::AllNull { .. }
470 ));
471
472 for i in 0..variant_array.len() {
474 if variant_array.is_valid(i) {
475 assert_eq!(variant_array.value(i), parquet_variant::Variant::Null);
476 }
477 }
478 }
479
480 #[test]
481 fn invalid_metadata_field_type() {
482 let fields = Fields::from(vec![
483 Field::new("metadata", DataType::Binary, true), Field::new("value", DataType::BinaryView, true),
485 ]);
486 let array = StructArray::new(
487 fields,
488 vec![make_binary_array(), make_binary_view_array()],
489 None,
490 );
491 let err = VariantArray::try_new(Arc::new(array));
492 assert_eq!(
493 err.unwrap_err().to_string(),
494 "Not yet implemented: VariantArray 'metadata' field must be BinaryView, got Binary"
495 );
496 }
497
498 #[test]
499 fn invalid_value_field_type() {
500 let fields = Fields::from(vec![
501 Field::new("metadata", DataType::BinaryView, true),
502 Field::new("value", DataType::Binary, true), ]);
504 let array = StructArray::new(
505 fields,
506 vec![make_binary_view_array(), make_binary_array()],
507 None,
508 );
509 let err = VariantArray::try_new(Arc::new(array));
510 assert_eq!(
511 err.unwrap_err().to_string(),
512 "Not yet implemented: VariantArray 'value' field must be BinaryView, got Binary"
513 );
514 }
515
516 fn make_binary_view_array() -> ArrayRef {
517 Arc::new(BinaryViewArray::from(vec![b"test" as &[u8]]))
518 }
519
520 fn make_binary_array() -> ArrayRef {
521 Arc::new(BinaryArray::from(vec![b"test" as &[u8]]))
522 }
523
524 #[test]
525 fn all_null_shredding_state() {
526 let metadata = BinaryViewArray::from(vec![b"test" as &[u8]]);
527 let shredding_state = ShreddingState::try_new(metadata.clone(), None, None).unwrap();
528
529 assert!(matches!(shredding_state, ShreddingState::AllNull { .. }));
530
531 if let ShreddingState::AllNull { metadata: m } = shredding_state {
533 assert_eq!(m.len(), metadata.len());
534 assert_eq!(m.value(0), metadata.value(0));
535 }
536 }
537
538 #[test]
539 fn all_null_variant_array_construction() {
540 let metadata = BinaryViewArray::from(vec![b"test" as &[u8]; 3]);
541 let nulls = NullBuffer::from(vec![false, false, false]); let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
544 let struct_array = StructArray::new(fields, vec![Arc::new(metadata)], Some(nulls));
545
546 let variant_array = VariantArray::try_new(Arc::new(struct_array)).unwrap();
547
548 assert!(matches!(
550 variant_array.shredding_state(),
551 ShreddingState::AllNull { .. }
552 ));
553
554 assert_eq!(variant_array.len(), 3);
556 assert!(!variant_array.is_valid(0));
557 assert!(!variant_array.is_valid(1));
558 assert!(!variant_array.is_valid(2));
559
560 for i in 0..variant_array.len() {
562 assert!(
563 !variant_array.is_valid(i),
564 "Expected value at index {i} to be null"
565 );
566 }
567 }
568
569 #[test]
570 fn value_field_present_but_all_null_should_be_unshredded() {
571 let metadata = BinaryViewArray::from(vec![b"test" as &[u8]; 3]);
574
575 let value_nulls = NullBuffer::from(vec![false, false, false]); let value_array = BinaryViewArray::from_iter_values(vec![""; 3]);
578 let value_data = value_array
579 .to_data()
580 .into_builder()
581 .nulls(Some(value_nulls))
582 .build()
583 .unwrap();
584 let value = BinaryViewArray::from(value_data);
585
586 let fields = Fields::from(vec![
587 Field::new("metadata", DataType::BinaryView, false),
588 Field::new("value", DataType::BinaryView, true), ]);
590 let struct_array = StructArray::new(
591 fields,
592 vec![Arc::new(metadata), Arc::new(value)],
593 None, );
595
596 let variant_array = VariantArray::try_new(Arc::new(struct_array)).unwrap();
597
598 assert!(matches!(
600 variant_array.shredding_state(),
601 ShreddingState::Unshredded { .. }
602 ));
603 }
604}