1use crate::VariantArray;
21use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuilder, StructArray};
22use arrow_schema::{ArrowError, DataType, Field, Fields};
23use parquet_variant::{
24 BuilderSpecificState, ListBuilder, MetadataBuilder, ObjectBuilder, Variant, VariantBuilderExt,
25 VariantMetadata,
26};
27use parquet_variant::{
28 ParentState, ReadOnlyMetadataBuilder, ValueBuilder, WritableMetadataBuilder,
29};
30use std::sync::Arc;
31
32#[derive(Debug)]
94pub struct VariantArrayBuilder {
95 nulls: NullBufferBuilder,
97 metadata_builder: WritableMetadataBuilder,
99 metadata_offsets: Vec<usize>,
101 value_builder: ValueBuilder,
103 value_offsets: Vec<usize>,
105 fields: Fields,
110}
111
112impl VariantArrayBuilder {
113 pub fn new(row_capacity: usize) -> Self {
114 let metadata_field = Field::new("metadata", DataType::BinaryView, false);
116 let value_field = Field::new("value", DataType::BinaryView, false);
117
118 Self {
119 nulls: NullBufferBuilder::new(row_capacity),
120 metadata_builder: WritableMetadataBuilder::default(),
121 metadata_offsets: Vec::with_capacity(row_capacity),
122 value_builder: ValueBuilder::new(),
123 value_offsets: Vec::with_capacity(row_capacity),
124 fields: Fields::from(vec![metadata_field, value_field]),
125 }
126 }
127
128 pub fn build(self) -> VariantArray {
130 let Self {
131 mut nulls,
132 metadata_builder,
133 metadata_offsets,
134 value_builder,
135 value_offsets,
136 fields,
137 } = self;
138
139 let metadata_buffer = metadata_builder.into_inner();
140 let metadata_array = binary_view_array_from_buffers(metadata_buffer, metadata_offsets);
141
142 let value_buffer = value_builder.into_inner();
143 let value_array = binary_view_array_from_buffers(value_buffer, value_offsets);
144
145 let inner = StructArray::new(
147 fields,
148 vec![
149 Arc::new(metadata_array) as ArrayRef,
150 Arc::new(value_array) as ArrayRef,
151 ],
152 nulls.finish(),
153 );
154 VariantArray::try_new(&inner).expect("valid VariantArray by construction")
157 }
158
159 pub fn append_null(&mut self) {
161 self.nulls.append_null();
162 self.metadata_offsets.push(self.metadata_builder.offset());
164 self.value_offsets.push(self.value_builder.offset());
165 }
166
167 pub fn append_nulls(&mut self, n: usize) {
169 self.nulls.append_n_nulls(n);
170 let metadata_offset = self.metadata_builder.offset();
172 let value_offset = self.value_builder.offset();
173 self.metadata_offsets
174 .extend(std::iter::repeat_n(metadata_offset, n));
175 self.value_offsets
176 .extend(std::iter::repeat_n(value_offset, n));
177 }
178
179 pub fn append_variant(&mut self, variant: Variant) {
181 ValueBuilder::append_variant(self.parent_state(), variant);
182 }
183
184 fn parent_state(&mut self) -> ParentState<'_, ArrayBuilderState<'_>> {
186 let state = ArrayBuilderState {
187 metadata_offsets: &mut self.metadata_offsets,
188 value_offsets: &mut self.value_offsets,
189 nulls: &mut self.nulls,
190 };
191
192 ParentState::new(&mut self.value_builder, &mut self.metadata_builder, state)
193 }
194}
195
196impl<'m, 'v> Extend<Option<Variant<'m, 'v>>> for VariantArrayBuilder {
197 fn extend<T: IntoIterator<Item = Option<Variant<'m, 'v>>>>(&mut self, iter: T) {
198 for v in iter {
199 match v {
200 Some(v) => self.append_variant(v),
201 None => self.append_null(),
202 }
203 }
204 }
205}
206
207#[derive(Debug)]
210pub struct ArrayBuilderState<'a> {
211 metadata_offsets: &'a mut Vec<usize>,
212 value_offsets: &'a mut Vec<usize>,
213 nulls: &'a mut NullBufferBuilder,
214}
215
216impl BuilderSpecificState for ArrayBuilderState<'_> {
218 fn finish(
219 &mut self,
220 metadata_builder: &mut dyn MetadataBuilder,
221 value_builder: &mut ValueBuilder,
222 ) {
223 self.metadata_offsets.push(metadata_builder.finish());
224 self.value_offsets.push(value_builder.offset());
225 self.nulls.append_non_null();
226 }
227}
228
229impl VariantBuilderExt for VariantArrayBuilder {
230 type State<'a>
231 = ArrayBuilderState<'a>
232 where
233 Self: 'a;
234
235 fn append_null(&mut self) {
237 self.append_null();
238 }
239
240 fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
241 self.append_variant(value.into());
242 }
243
244 fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
245 Ok(ListBuilder::new(self.parent_state(), false))
246 }
247
248 fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
249 Ok(ObjectBuilder::new(self.parent_state(), false))
250 }
251}
252
253#[derive(Debug)]
281pub struct VariantValueArrayBuilder {
282 value_builder: ValueBuilder,
283 value_offsets: Vec<usize>,
284 nulls: NullBufferBuilder,
285}
286
287impl VariantValueArrayBuilder {
288 pub fn new(row_capacity: usize) -> Self {
290 Self {
291 value_builder: ValueBuilder::new(),
292 value_offsets: Vec::with_capacity(row_capacity),
293 nulls: NullBufferBuilder::new(row_capacity),
294 }
295 }
296
297 pub fn build(mut self) -> Result<BinaryViewArray, ArrowError> {
302 let value_buffer = self.value_builder.into_inner();
303 let mut array = binary_view_array_from_buffers(value_buffer, self.value_offsets);
304 if let Some(nulls) = self.nulls.finish() {
305 let (views, buffers, _) = array.into_parts();
306 array = BinaryViewArray::try_new(views, buffers, Some(nulls))?;
307 }
308 Ok(array)
309 }
310
311 pub fn append_null(&mut self) {
318 self.value_offsets.push(self.value_builder.offset());
319 self.nulls.append_null();
320 }
321
322 pub fn append_value(&mut self, value: Variant<'_, '_>) {
340 self.builder_ext(&value.metadata().clone())
342 .append_value(value);
343 }
344
345 pub fn parent_state<'a>(
370 &'a mut self,
371 metadata_builder: &'a mut dyn MetadataBuilder,
372 ) -> ParentState<'a, ValueArrayBuilderState<'a>> {
373 let state = ValueArrayBuilderState {
374 value_offsets: &mut self.value_offsets,
375 nulls: &mut self.nulls,
376 };
377
378 ParentState::new(&mut self.value_builder, metadata_builder, state)
379 }
380
381 pub fn builder_ext<'a>(
384 &'a mut self,
385 metadata: &'a VariantMetadata<'a>,
386 ) -> VariantValueArrayBuilderExt<'a> {
387 VariantValueArrayBuilderExt {
388 metadata_builder: ReadOnlyMetadataBuilder::new(metadata),
389 value_builder: self,
390 }
391 }
392}
393
394#[derive(Debug)]
397pub struct ValueArrayBuilderState<'a> {
398 value_offsets: &'a mut Vec<usize>,
399 nulls: &'a mut NullBufferBuilder,
400}
401
402impl BuilderSpecificState for ValueArrayBuilderState<'_> {
404 fn finish(
405 &mut self,
406 _metadata_builder: &mut dyn MetadataBuilder,
407 value_builder: &mut ValueBuilder,
408 ) {
409 self.value_offsets.push(value_builder.offset());
410 self.nulls.append_non_null();
411 }
412}
413
414pub struct VariantValueArrayBuilderExt<'a> {
417 metadata_builder: ReadOnlyMetadataBuilder<'a>,
418 value_builder: &'a mut VariantValueArrayBuilder,
419}
420
421impl<'a> VariantValueArrayBuilderExt<'a> {
422 pub fn new(
424 metadata_builder: ReadOnlyMetadataBuilder<'a>,
425 value_builder: &'a mut VariantValueArrayBuilder,
426 ) -> Self {
427 Self {
428 metadata_builder,
429 value_builder,
430 }
431 }
432}
433
434impl<'a> VariantBuilderExt for VariantValueArrayBuilderExt<'a> {
435 type State<'b>
436 = ValueArrayBuilderState<'b>
437 where
438 Self: 'b;
439
440 fn append_null(&mut self) {
441 self.value_builder.append_null()
442 }
443
444 fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
445 let state = self.value_builder.parent_state(&mut self.metadata_builder);
446 ValueBuilder::append_variant_bytes(state, value.into());
447 }
448
449 fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
450 let state = self.value_builder.parent_state(&mut self.metadata_builder);
451 Ok(ListBuilder::new(state, false))
452 }
453
454 fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
455 let state = self.value_builder.parent_state(&mut self.metadata_builder);
456 Ok(ObjectBuilder::new(state, false))
457 }
458}
459
460fn binary_view_array_from_buffers(buffer: Vec<u8>, offsets: Vec<usize>) -> BinaryViewArray {
461 u32::try_from(buffer.len()).expect("buffer length should fit in u32");
464
465 let mut builder = BinaryViewBuilder::with_capacity(offsets.len());
466 let block = builder.append_block(buffer.into());
467 let mut start = 0;
469 for end in offsets {
470 let end = end as u32; builder
472 .try_append_view(block, start, end - start)
473 .expect("Failed to append view");
474 start = end;
475 }
476 builder.finish()
477}
478
479#[cfg(test)]
480mod test {
481 use super::*;
482 use arrow::array::Array;
483 use parquet_variant::{ShortString, Variant};
484
485 #[test]
487 fn test_variant_array_builder_non_nullable() {
488 let mut builder = VariantArrayBuilder::new(10);
489
490 builder.extend([
491 None, Some(Variant::from(42_i32)),
493 ]);
494
495 let variant_array = builder.build();
496
497 assert_eq!(variant_array.len(), 2);
498 assert!(variant_array.is_null(0));
499 assert!(!variant_array.is_null(1));
500 assert_eq!(variant_array.value(1), Variant::from(42i32));
501
502 assert!(variant_array.metadata_column().nulls().is_none());
504 assert!(variant_array.value_column().unwrap().nulls().is_none());
505 let DataType::Struct(fields) = variant_array.data_type() else {
506 panic!("Expected VariantArray to have Struct data type");
507 };
508 for field in fields {
509 assert!(
510 !field.is_nullable(),
511 "Field {} should be non-nullable",
512 field.name()
513 );
514 }
515 }
516
517 #[test]
519 fn test_variant_array_builder() {
520 let mut builder = VariantArrayBuilder::new(10);
521 builder.append_null(); builder.append_variant(Variant::from(42i32));
523
524 builder.new_object().with_field("foo", "bar").finish();
526
527 builder
529 .new_list()
530 .with_value(Variant::from(1i32))
531 .with_value(Variant::from(2i32))
532 .finish();
533 let variant_array = builder.build();
534
535 assert_eq!(variant_array.len(), 4);
536 assert!(variant_array.is_null(0));
537 assert!(!variant_array.is_null(1));
538 assert_eq!(variant_array.value(1), Variant::from(42i32));
539 assert!(!variant_array.is_null(2));
540 let variant = variant_array.value(2);
541 let variant = variant.as_object().expect("variant to be an object");
542 assert_eq!(variant.get("foo").unwrap(), Variant::from("bar"));
543 assert!(!variant_array.is_null(3));
544 let variant = variant_array.value(3);
545 let list = variant.as_list().expect("variant to be a list");
546 assert_eq!(list.len(), 2);
547 }
548
549 #[test]
550 fn test_variant_array_builder_append_nulls() {
551 let mut builder = VariantArrayBuilder::new(6);
552 builder.append_variant(Variant::from(1i32));
553 builder.append_nulls(0); builder.append_nulls(3);
555 builder.append_variant(Variant::from(2i32));
556
557 let variant_array = builder.build();
558
559 assert_eq!(variant_array.len(), 5);
560 assert_eq!(variant_array.value(0), Variant::from(1i32));
561 assert!(variant_array.is_null(1));
562 assert!(variant_array.is_null(2));
563 assert!(variant_array.is_null(3));
564 assert_eq!(variant_array.value(4), Variant::from(2i32));
565 }
566
567 #[test]
568 fn test_extend_variant_array_builder() {
569 let mut b = VariantArrayBuilder::new(3);
570 b.extend([None, Some(Variant::Null), Some(Variant::from("norm"))]);
571
572 let variant_array = b.build();
573
574 assert_eq!(variant_array.len(), 3);
575 assert!(variant_array.is_null(0));
576 assert_eq!(variant_array.value(1), Variant::Null);
577 assert_eq!(
578 variant_array.value(2),
579 Variant::ShortString(ShortString::try_new("norm").unwrap())
580 );
581 }
582
583 #[test]
584 fn test_variant_value_array_builder_basic() {
585 let mut builder = VariantValueArrayBuilder::new(10);
586
587 builder.append_value(Variant::from(42i32));
589 builder.append_null();
590 builder.append_value(Variant::from("hello"));
591
592 let value_array = builder.build().unwrap();
593 assert_eq!(value_array.len(), 3);
594 }
595
596 #[test]
597 fn test_variant_value_array_builder_with_objects() {
598 let mut builder = VariantArrayBuilder::new(3);
600 builder
601 .new_object()
602 .with_field("name", "Alice")
603 .with_field("age", 30i32)
604 .finish();
605
606 builder
607 .new_object()
608 .with_field("name", "Bob")
609 .with_field("age", 42i32)
610 .with_field("city", "Wonderland")
611 .finish();
612
613 builder
614 .new_object()
615 .with_field("name", "Charlie")
616 .with_field("age", 1i32)
617 .finish();
618
619 let array = builder.build();
620
621 let mut value_builder = VariantValueArrayBuilder::new(3);
626
627 value_builder.append_value(array.value(0));
629
630 let value = array.value(1);
632 let mut builder = value_builder.builder_ext(value.metadata());
633 builder
634 .new_object()
635 .with_field("name", value.get_object_field("name").unwrap())
636 .with_field("age", value.get_object_field("age").unwrap())
637 .finish();
638
639 let value = array.value(2);
641 let mut builder = value_builder.builder_ext(value.metadata());
642 builder
643 .new_list()
644 .with_value(value.clone())
645 .with_value(value.clone())
646 .finish();
647
648 let array2 = VariantArray::from_parts(
649 array.metadata_column().clone(),
650 Some(Arc::new(value_builder.build().unwrap())),
651 None,
652 None,
653 );
654
655 assert_eq!(array2.len(), 3);
656 assert_eq!(array.value(0), array2.value(0));
657
658 assert_eq!(
659 array.value(1).get_object_field("name"),
660 array2.value(1).get_object_field("name")
661 );
662 assert_eq!(
663 array.value(1).get_object_field("age"),
664 array2.value(1).get_object_field("age")
665 );
666
667 assert_eq!(array.value(2), array2.value(2).get_list_element(0).unwrap());
668 assert_eq!(array.value(2), array2.value(2).get_list_element(1).unwrap());
669 }
670}