1use crate::VariantArray;
21use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuilder, StructArray};
22use arrow_schema::{ArrowError, DataType, Field, Fields};
23use parquet_variant::{
24 BuilderSpecificState, ListBuilder, MetadataBuilder, ObjectBuilder, Variant, VariantBuilderExt,
25 VariantMetadata,
26};
27use parquet_variant::{
28 ParentState, ReadOnlyMetadataBuilder, ValueBuilder, WritableMetadataBuilder,
29};
30use std::sync::Arc;
31
32#[derive(Debug)]
86pub struct VariantArrayBuilder {
87 nulls: NullBufferBuilder,
89 metadata_builder: WritableMetadataBuilder,
91 metadata_offsets: Vec<usize>,
93 value_builder: ValueBuilder,
95 value_offsets: Vec<usize>,
97 fields: Fields,
102}
103
104impl VariantArrayBuilder {
105 pub fn new(row_capacity: usize) -> Self {
106 let metadata_field = Field::new("metadata", DataType::BinaryView, false);
108 let value_field = Field::new("value", DataType::BinaryView, false);
109
110 Self {
111 nulls: NullBufferBuilder::new(row_capacity),
112 metadata_builder: WritableMetadataBuilder::default(),
113 metadata_offsets: Vec::with_capacity(row_capacity),
114 value_builder: ValueBuilder::new(),
115 value_offsets: Vec::with_capacity(row_capacity),
116 fields: Fields::from(vec![metadata_field, value_field]),
117 }
118 }
119
120 pub fn build(self) -> VariantArray {
122 let Self {
123 mut nulls,
124 metadata_builder,
125 metadata_offsets,
126 value_builder,
127 value_offsets,
128 fields,
129 } = self;
130
131 let metadata_buffer = metadata_builder.into_inner();
132 let metadata_array = binary_view_array_from_buffers(metadata_buffer, metadata_offsets);
133
134 let value_buffer = value_builder.into_inner();
135 let value_array = binary_view_array_from_buffers(value_buffer, value_offsets);
136
137 let inner = StructArray::new(
139 fields,
140 vec![
141 Arc::new(metadata_array) as ArrayRef,
142 Arc::new(value_array) as ArrayRef,
143 ],
144 nulls.finish(),
145 );
146 VariantArray::try_new(&inner).expect("valid VariantArray by construction")
149 }
150
151 pub fn append_null(&mut self) {
153 self.nulls.append_null();
154 self.metadata_offsets.push(self.metadata_builder.offset());
156 self.value_offsets.push(self.value_builder.offset());
157 }
158
159 pub fn append_nulls(&mut self, n: usize) {
161 self.nulls.append_n_nulls(n);
162 let metadata_offset = self.metadata_builder.offset();
164 let value_offset = self.value_builder.offset();
165 self.metadata_offsets
166 .extend(std::iter::repeat_n(metadata_offset, n));
167 self.value_offsets
168 .extend(std::iter::repeat_n(value_offset, n));
169 }
170
171 pub fn append_variant(&mut self, variant: Variant) {
173 ValueBuilder::append_variant(self.parent_state(), variant);
174 }
175
176 fn parent_state(&mut self) -> ParentState<'_, ArrayBuilderState<'_>> {
178 let state = ArrayBuilderState {
179 metadata_offsets: &mut self.metadata_offsets,
180 value_offsets: &mut self.value_offsets,
181 nulls: &mut self.nulls,
182 };
183
184 ParentState::new(&mut self.value_builder, &mut self.metadata_builder, state)
185 }
186}
187
188impl<'m, 'v> Extend<Option<Variant<'m, 'v>>> for VariantArrayBuilder {
189 fn extend<T: IntoIterator<Item = Option<Variant<'m, 'v>>>>(&mut self, iter: T) {
190 for v in iter {
191 match v {
192 Some(v) => self.append_variant(v),
193 None => self.append_null(),
194 }
195 }
196 }
197}
198
199#[derive(Debug)]
202pub struct ArrayBuilderState<'a> {
203 metadata_offsets: &'a mut Vec<usize>,
204 value_offsets: &'a mut Vec<usize>,
205 nulls: &'a mut NullBufferBuilder,
206}
207
208impl BuilderSpecificState for ArrayBuilderState<'_> {
210 fn finish(
211 &mut self,
212 metadata_builder: &mut dyn MetadataBuilder,
213 value_builder: &mut ValueBuilder,
214 ) {
215 self.metadata_offsets.push(metadata_builder.finish());
216 self.value_offsets.push(value_builder.offset());
217 self.nulls.append_non_null();
218 }
219}
220
221impl VariantBuilderExt for VariantArrayBuilder {
222 type State<'a>
223 = ArrayBuilderState<'a>
224 where
225 Self: 'a;
226
227 fn append_null(&mut self) {
229 self.append_null();
230 }
231
232 fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
233 self.append_variant(value.into());
234 }
235
236 fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
237 Ok(ListBuilder::new(self.parent_state(), false))
238 }
239
240 fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
241 Ok(ObjectBuilder::new(self.parent_state(), false))
242 }
243}
244
245#[derive(Debug)]
273pub struct VariantValueArrayBuilder {
274 value_builder: ValueBuilder,
275 value_offsets: Vec<usize>,
276 nulls: NullBufferBuilder,
277}
278
279impl VariantValueArrayBuilder {
280 pub fn new(row_capacity: usize) -> Self {
282 Self {
283 value_builder: ValueBuilder::new(),
284 value_offsets: Vec::with_capacity(row_capacity),
285 nulls: NullBufferBuilder::new(row_capacity),
286 }
287 }
288
289 pub fn build(mut self) -> Result<BinaryViewArray, ArrowError> {
294 let value_buffer = self.value_builder.into_inner();
295 let mut array = binary_view_array_from_buffers(value_buffer, self.value_offsets);
296 if let Some(nulls) = self.nulls.finish() {
297 let (views, buffers, _) = array.into_parts();
298 array = BinaryViewArray::try_new(views, buffers, Some(nulls))?;
299 }
300 Ok(array)
301 }
302
303 pub fn append_null(&mut self) {
310 self.value_offsets.push(self.value_builder.offset());
311 self.nulls.append_null();
312 }
313
314 pub fn append_value(&mut self, value: Variant<'_, '_>) {
332 self.builder_ext(&value.metadata().clone())
334 .append_value(value);
335 }
336
337 pub fn parent_state<'a>(
362 &'a mut self,
363 metadata_builder: &'a mut dyn MetadataBuilder,
364 ) -> ParentState<'a, ValueArrayBuilderState<'a>> {
365 let state = ValueArrayBuilderState {
366 value_offsets: &mut self.value_offsets,
367 nulls: &mut self.nulls,
368 };
369
370 ParentState::new(&mut self.value_builder, metadata_builder, state)
371 }
372
373 pub fn builder_ext<'a>(
376 &'a mut self,
377 metadata: &'a VariantMetadata<'a>,
378 ) -> VariantValueArrayBuilderExt<'a> {
379 VariantValueArrayBuilderExt {
380 metadata_builder: ReadOnlyMetadataBuilder::new(metadata),
381 value_builder: self,
382 }
383 }
384}
385
386#[derive(Debug)]
389pub struct ValueArrayBuilderState<'a> {
390 value_offsets: &'a mut Vec<usize>,
391 nulls: &'a mut NullBufferBuilder,
392}
393
394impl BuilderSpecificState for ValueArrayBuilderState<'_> {
396 fn finish(
397 &mut self,
398 _metadata_builder: &mut dyn MetadataBuilder,
399 value_builder: &mut ValueBuilder,
400 ) {
401 self.value_offsets.push(value_builder.offset());
402 self.nulls.append_non_null();
403 }
404}
405
406pub struct VariantValueArrayBuilderExt<'a> {
409 metadata_builder: ReadOnlyMetadataBuilder<'a>,
410 value_builder: &'a mut VariantValueArrayBuilder,
411}
412
413impl<'a> VariantValueArrayBuilderExt<'a> {
414 pub fn new(
416 metadata_builder: ReadOnlyMetadataBuilder<'a>,
417 value_builder: &'a mut VariantValueArrayBuilder,
418 ) -> Self {
419 Self {
420 metadata_builder,
421 value_builder,
422 }
423 }
424}
425
426impl<'a> VariantBuilderExt for VariantValueArrayBuilderExt<'a> {
427 type State<'b>
428 = ValueArrayBuilderState<'b>
429 where
430 Self: 'b;
431
432 fn append_null(&mut self) {
433 self.value_builder.append_null()
434 }
435
436 fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
437 let state = self.value_builder.parent_state(&mut self.metadata_builder);
438 ValueBuilder::append_variant_bytes(state, value.into());
439 }
440
441 fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
442 let state = self.value_builder.parent_state(&mut self.metadata_builder);
443 Ok(ListBuilder::new(state, false))
444 }
445
446 fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
447 let state = self.value_builder.parent_state(&mut self.metadata_builder);
448 Ok(ObjectBuilder::new(state, false))
449 }
450}
451
452fn binary_view_array_from_buffers(buffer: Vec<u8>, offsets: Vec<usize>) -> BinaryViewArray {
453 u32::try_from(buffer.len()).expect("buffer length should fit in u32");
456
457 let mut builder = BinaryViewBuilder::with_capacity(offsets.len());
458 let block = builder.append_block(buffer.into());
459 let mut start = 0;
461 for end in offsets {
462 let end = end as u32; builder
464 .try_append_view(block, start, end - start)
465 .expect("Failed to append view");
466 start = end;
467 }
468 builder.finish()
469}
470
471#[cfg(test)]
472mod test {
473 use super::*;
474 use arrow::array::Array;
475 use parquet_variant::{ShortString, Variant};
476
477 #[test]
479 fn test_variant_array_builder_non_nullable() {
480 let mut builder = VariantArrayBuilder::new(10);
481
482 builder.extend([
483 None, Some(Variant::from(42_i32)),
485 ]);
486
487 let variant_array = builder.build();
488
489 assert_eq!(variant_array.len(), 2);
490 assert!(variant_array.is_null(0));
491 assert!(!variant_array.is_null(1));
492 assert_eq!(variant_array.value(1), Variant::from(42i32));
493
494 assert!(variant_array.metadata_field().nulls().is_none());
496 assert!(variant_array.value_field().unwrap().nulls().is_none());
497 let DataType::Struct(fields) = variant_array.data_type() else {
498 panic!("Expected VariantArray to have Struct data type");
499 };
500 for field in fields {
501 assert!(
502 !field.is_nullable(),
503 "Field {} should be non-nullable",
504 field.name()
505 );
506 }
507 }
508
509 #[test]
511 fn test_variant_array_builder() {
512 let mut builder = VariantArrayBuilder::new(10);
513 builder.append_null(); builder.append_variant(Variant::from(42i32));
515
516 builder.new_object().with_field("foo", "bar").finish();
518
519 builder
521 .new_list()
522 .with_value(Variant::from(1i32))
523 .with_value(Variant::from(2i32))
524 .finish();
525 let variant_array = builder.build();
526
527 assert_eq!(variant_array.len(), 4);
528 assert!(variant_array.is_null(0));
529 assert!(!variant_array.is_null(1));
530 assert_eq!(variant_array.value(1), Variant::from(42i32));
531 assert!(!variant_array.is_null(2));
532 let variant = variant_array.value(2);
533 let variant = variant.as_object().expect("variant to be an object");
534 assert_eq!(variant.get("foo").unwrap(), Variant::from("bar"));
535 assert!(!variant_array.is_null(3));
536 let variant = variant_array.value(3);
537 let list = variant.as_list().expect("variant to be a list");
538 assert_eq!(list.len(), 2);
539 }
540
541 #[test]
542 fn test_variant_array_builder_append_nulls() {
543 let mut builder = VariantArrayBuilder::new(6);
544 builder.append_variant(Variant::from(1i32));
545 builder.append_nulls(0); builder.append_nulls(3);
547 builder.append_variant(Variant::from(2i32));
548
549 let variant_array = builder.build();
550
551 assert_eq!(variant_array.len(), 5);
552 assert_eq!(variant_array.value(0), Variant::from(1i32));
553 assert!(variant_array.is_null(1));
554 assert!(variant_array.is_null(2));
555 assert!(variant_array.is_null(3));
556 assert_eq!(variant_array.value(4), Variant::from(2i32));
557 }
558
559 #[test]
560 fn test_extend_variant_array_builder() {
561 let mut b = VariantArrayBuilder::new(3);
562 b.extend([None, Some(Variant::Null), Some(Variant::from("norm"))]);
563
564 let variant_array = b.build();
565
566 assert_eq!(variant_array.len(), 3);
567 assert!(variant_array.is_null(0));
568 assert_eq!(variant_array.value(1), Variant::Null);
569 assert_eq!(
570 variant_array.value(2),
571 Variant::ShortString(ShortString::try_new("norm").unwrap())
572 );
573 }
574
575 #[test]
576 fn test_variant_value_array_builder_basic() {
577 let mut builder = VariantValueArrayBuilder::new(10);
578
579 builder.append_value(Variant::from(42i32));
581 builder.append_null();
582 builder.append_value(Variant::from("hello"));
583
584 let value_array = builder.build().unwrap();
585 assert_eq!(value_array.len(), 3);
586 }
587
588 #[test]
589 fn test_variant_value_array_builder_with_objects() {
590 let mut builder = VariantArrayBuilder::new(3);
592 builder
593 .new_object()
594 .with_field("name", "Alice")
595 .with_field("age", 30i32)
596 .finish();
597
598 builder
599 .new_object()
600 .with_field("name", "Bob")
601 .with_field("age", 42i32)
602 .with_field("city", "Wonderland")
603 .finish();
604
605 builder
606 .new_object()
607 .with_field("name", "Charlie")
608 .with_field("age", 1i32)
609 .finish();
610
611 let array = builder.build();
612
613 let mut value_builder = VariantValueArrayBuilder::new(3);
618
619 value_builder.append_value(array.value(0));
621
622 let value = array.value(1);
624 let mut builder = value_builder.builder_ext(value.metadata());
625 builder
626 .new_object()
627 .with_field("name", value.get_object_field("name").unwrap())
628 .with_field("age", value.get_object_field("age").unwrap())
629 .finish();
630
631 let value = array.value(2);
633 let mut builder = value_builder.builder_ext(value.metadata());
634 builder
635 .new_list()
636 .with_value(value.clone())
637 .with_value(value.clone())
638 .finish();
639
640 let array2 = VariantArray::from_parts(
641 array.metadata_field().clone(),
642 Some(Arc::new(value_builder.build().unwrap())),
643 None,
644 None,
645 );
646
647 assert_eq!(array2.len(), 3);
648 assert_eq!(array.value(0), array2.value(0));
649
650 assert_eq!(
651 array.value(1).get_object_field("name"),
652 array2.value(1).get_object_field("name")
653 );
654 assert_eq!(
655 array.value(1).get_object_field("age"),
656 array2.value(1).get_object_field("age")
657 );
658
659 assert_eq!(array.value(2), array2.value(2).get_list_element(0).unwrap());
660 assert_eq!(array.value(2), array2.value(2).get_list_element(1).unwrap());
661 }
662}