1use crate::VariantArray;
21use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuilder, StructArray};
22use arrow_schema::{ArrowError, DataType, Field, Fields};
23use parquet_variant::{
24 BuilderSpecificState, ListBuilder, MetadataBuilder, ObjectBuilder, Variant, VariantBuilderExt,
25 VariantMetadata,
26};
27use parquet_variant::{
28 ParentState, ReadOnlyMetadataBuilder, ValueBuilder, WritableMetadataBuilder,
29};
30use std::sync::Arc;
31
32#[derive(Debug)]
86pub struct VariantArrayBuilder {
87 nulls: NullBufferBuilder,
89 metadata_builder: WritableMetadataBuilder,
91 metadata_offsets: Vec<usize>,
93 value_builder: ValueBuilder,
95 value_offsets: Vec<usize>,
97 fields: Fields,
102}
103
104impl VariantArrayBuilder {
105 pub fn new(row_capacity: usize) -> Self {
106 let metadata_field = Field::new("metadata", DataType::BinaryView, false);
108 let value_field = Field::new("value", DataType::BinaryView, false);
109
110 Self {
111 nulls: NullBufferBuilder::new(row_capacity),
112 metadata_builder: WritableMetadataBuilder::default(),
113 metadata_offsets: Vec::with_capacity(row_capacity),
114 value_builder: ValueBuilder::new(),
115 value_offsets: Vec::with_capacity(row_capacity),
116 fields: Fields::from(vec![metadata_field, value_field]),
117 }
118 }
119
120 pub fn build(self) -> VariantArray {
122 let Self {
123 mut nulls,
124 metadata_builder,
125 metadata_offsets,
126 value_builder,
127 value_offsets,
128 fields,
129 } = self;
130
131 let metadata_buffer = metadata_builder.into_inner();
132 let metadata_array = binary_view_array_from_buffers(metadata_buffer, metadata_offsets);
133
134 let value_buffer = value_builder.into_inner();
135 let value_array = binary_view_array_from_buffers(value_buffer, value_offsets);
136
137 let inner = StructArray::new(
139 fields,
140 vec![
141 Arc::new(metadata_array) as ArrayRef,
142 Arc::new(value_array) as ArrayRef,
143 ],
144 nulls.finish(),
145 );
146 VariantArray::try_new(&inner).expect("valid VariantArray by construction")
149 }
150
151 pub fn append_null(&mut self) {
153 self.nulls.append_null();
154 self.metadata_offsets.push(self.metadata_builder.offset());
156 self.value_offsets.push(self.value_builder.offset());
157 }
158
159 pub fn append_variant(&mut self, variant: Variant) {
161 ValueBuilder::append_variant(self.parent_state(), variant);
162 }
163
164 fn parent_state(&mut self) -> ParentState<'_, ArrayBuilderState<'_>> {
166 let state = ArrayBuilderState {
167 metadata_offsets: &mut self.metadata_offsets,
168 value_offsets: &mut self.value_offsets,
169 nulls: &mut self.nulls,
170 };
171
172 ParentState::new(&mut self.value_builder, &mut self.metadata_builder, state)
173 }
174}
175
176impl<'m, 'v> Extend<Option<Variant<'m, 'v>>> for VariantArrayBuilder {
177 fn extend<T: IntoIterator<Item = Option<Variant<'m, 'v>>>>(&mut self, iter: T) {
178 for v in iter {
179 match v {
180 Some(v) => self.append_variant(v),
181 None => self.append_null(),
182 }
183 }
184 }
185}
186
187#[derive(Debug)]
190pub struct ArrayBuilderState<'a> {
191 metadata_offsets: &'a mut Vec<usize>,
192 value_offsets: &'a mut Vec<usize>,
193 nulls: &'a mut NullBufferBuilder,
194}
195
196impl BuilderSpecificState for ArrayBuilderState<'_> {
198 fn finish(
199 &mut self,
200 metadata_builder: &mut dyn MetadataBuilder,
201 value_builder: &mut ValueBuilder,
202 ) {
203 self.metadata_offsets.push(metadata_builder.finish());
204 self.value_offsets.push(value_builder.offset());
205 self.nulls.append_non_null();
206 }
207}
208
209impl VariantBuilderExt for VariantArrayBuilder {
210 type State<'a>
211 = ArrayBuilderState<'a>
212 where
213 Self: 'a;
214
215 fn append_null(&mut self) {
217 self.append_null();
218 }
219
220 fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
221 self.append_variant(value.into());
222 }
223
224 fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
225 Ok(ListBuilder::new(self.parent_state(), false))
226 }
227
228 fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
229 Ok(ObjectBuilder::new(self.parent_state(), false))
230 }
231}
232
233#[derive(Debug)]
261pub struct VariantValueArrayBuilder {
262 value_builder: ValueBuilder,
263 value_offsets: Vec<usize>,
264 nulls: NullBufferBuilder,
265}
266
267impl VariantValueArrayBuilder {
268 pub fn new(row_capacity: usize) -> Self {
270 Self {
271 value_builder: ValueBuilder::new(),
272 value_offsets: Vec::with_capacity(row_capacity),
273 nulls: NullBufferBuilder::new(row_capacity),
274 }
275 }
276
277 pub fn build(mut self) -> Result<BinaryViewArray, ArrowError> {
282 let value_buffer = self.value_builder.into_inner();
283 let mut array = binary_view_array_from_buffers(value_buffer, self.value_offsets);
284 if let Some(nulls) = self.nulls.finish() {
285 let (views, buffers, _) = array.into_parts();
286 array = BinaryViewArray::try_new(views, buffers, Some(nulls))?;
287 }
288 Ok(array)
289 }
290
291 pub fn append_null(&mut self) {
298 self.value_offsets.push(self.value_builder.offset());
299 self.nulls.append_null();
300 }
301
302 pub fn append_value(&mut self, value: Variant<'_, '_>) {
320 self.builder_ext(&value.metadata().clone())
322 .append_value(value);
323 }
324
325 pub fn parent_state<'a>(
350 &'a mut self,
351 metadata_builder: &'a mut dyn MetadataBuilder,
352 ) -> ParentState<'a, ValueArrayBuilderState<'a>> {
353 let state = ValueArrayBuilderState {
354 value_offsets: &mut self.value_offsets,
355 nulls: &mut self.nulls,
356 };
357
358 ParentState::new(&mut self.value_builder, metadata_builder, state)
359 }
360
361 pub fn builder_ext<'a>(
364 &'a mut self,
365 metadata: &'a VariantMetadata<'a>,
366 ) -> VariantValueArrayBuilderExt<'a> {
367 VariantValueArrayBuilderExt {
368 metadata_builder: ReadOnlyMetadataBuilder::new(metadata),
369 value_builder: self,
370 }
371 }
372}
373
374#[derive(Debug)]
377pub struct ValueArrayBuilderState<'a> {
378 value_offsets: &'a mut Vec<usize>,
379 nulls: &'a mut NullBufferBuilder,
380}
381
382impl BuilderSpecificState for ValueArrayBuilderState<'_> {
384 fn finish(
385 &mut self,
386 _metadata_builder: &mut dyn MetadataBuilder,
387 value_builder: &mut ValueBuilder,
388 ) {
389 self.value_offsets.push(value_builder.offset());
390 self.nulls.append_non_null();
391 }
392}
393
394pub struct VariantValueArrayBuilderExt<'a> {
397 metadata_builder: ReadOnlyMetadataBuilder<'a>,
398 value_builder: &'a mut VariantValueArrayBuilder,
399}
400
401impl<'a> VariantValueArrayBuilderExt<'a> {
402 pub fn new(
404 metadata_builder: ReadOnlyMetadataBuilder<'a>,
405 value_builder: &'a mut VariantValueArrayBuilder,
406 ) -> Self {
407 Self {
408 metadata_builder,
409 value_builder,
410 }
411 }
412}
413
414impl<'a> VariantBuilderExt for VariantValueArrayBuilderExt<'a> {
415 type State<'b>
416 = ValueArrayBuilderState<'b>
417 where
418 Self: 'b;
419
420 fn append_null(&mut self) {
421 self.value_builder.append_null()
422 }
423
424 fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
425 let state = self.value_builder.parent_state(&mut self.metadata_builder);
426 ValueBuilder::append_variant_bytes(state, value.into());
427 }
428
429 fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
430 let state = self.value_builder.parent_state(&mut self.metadata_builder);
431 Ok(ListBuilder::new(state, false))
432 }
433
434 fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
435 let state = self.value_builder.parent_state(&mut self.metadata_builder);
436 Ok(ObjectBuilder::new(state, false))
437 }
438}
439
440fn binary_view_array_from_buffers(buffer: Vec<u8>, offsets: Vec<usize>) -> BinaryViewArray {
441 u32::try_from(buffer.len()).expect("buffer length should fit in u32");
444
445 let mut builder = BinaryViewBuilder::with_capacity(offsets.len());
446 let block = builder.append_block(buffer.into());
447 let mut start = 0;
449 for end in offsets {
450 let end = end as u32; builder
452 .try_append_view(block, start, end - start)
453 .expect("Failed to append view");
454 start = end;
455 }
456 builder.finish()
457}
458
459#[cfg(test)]
460mod test {
461 use super::*;
462 use arrow::array::Array;
463 use parquet_variant::{ShortString, Variant};
464
465 #[test]
467 fn test_variant_array_builder_non_nullable() {
468 let mut builder = VariantArrayBuilder::new(10);
469
470 builder.extend([
471 None, Some(Variant::from(42_i32)),
473 ]);
474
475 let variant_array = builder.build();
476
477 assert_eq!(variant_array.len(), 2);
478 assert!(variant_array.is_null(0));
479 assert!(!variant_array.is_null(1));
480 assert_eq!(variant_array.value(1), Variant::from(42i32));
481
482 assert!(variant_array.metadata_field().nulls().is_none());
484 assert!(variant_array.value_field().unwrap().nulls().is_none());
485 let DataType::Struct(fields) = variant_array.data_type() else {
486 panic!("Expected VariantArray to have Struct data type");
487 };
488 for field in fields {
489 assert!(
490 !field.is_nullable(),
491 "Field {} should be non-nullable",
492 field.name()
493 );
494 }
495 }
496
497 #[test]
499 fn test_variant_array_builder() {
500 let mut builder = VariantArrayBuilder::new(10);
501 builder.append_null(); builder.append_variant(Variant::from(42i32));
503
504 builder.new_object().with_field("foo", "bar").finish();
506
507 builder
509 .new_list()
510 .with_value(Variant::from(1i32))
511 .with_value(Variant::from(2i32))
512 .finish();
513 let variant_array = builder.build();
514
515 assert_eq!(variant_array.len(), 4);
516 assert!(variant_array.is_null(0));
517 assert!(!variant_array.is_null(1));
518 assert_eq!(variant_array.value(1), Variant::from(42i32));
519 assert!(!variant_array.is_null(2));
520 let variant = variant_array.value(2);
521 let variant = variant.as_object().expect("variant to be an object");
522 assert_eq!(variant.get("foo").unwrap(), Variant::from("bar"));
523 assert!(!variant_array.is_null(3));
524 let variant = variant_array.value(3);
525 let list = variant.as_list().expect("variant to be a list");
526 assert_eq!(list.len(), 2);
527 }
528
529 #[test]
530 fn test_extend_variant_array_builder() {
531 let mut b = VariantArrayBuilder::new(3);
532 b.extend([None, Some(Variant::Null), Some(Variant::from("norm"))]);
533
534 let variant_array = b.build();
535
536 assert_eq!(variant_array.len(), 3);
537 assert!(variant_array.is_null(0));
538 assert_eq!(variant_array.value(1), Variant::Null);
539 assert_eq!(
540 variant_array.value(2),
541 Variant::ShortString(ShortString::try_new("norm").unwrap())
542 );
543 }
544
545 #[test]
546 fn test_variant_value_array_builder_basic() {
547 let mut builder = VariantValueArrayBuilder::new(10);
548
549 builder.append_value(Variant::from(42i32));
551 builder.append_null();
552 builder.append_value(Variant::from("hello"));
553
554 let value_array = builder.build().unwrap();
555 assert_eq!(value_array.len(), 3);
556 }
557
558 #[test]
559 fn test_variant_value_array_builder_with_objects() {
560 let mut builder = VariantArrayBuilder::new(3);
562 builder
563 .new_object()
564 .with_field("name", "Alice")
565 .with_field("age", 30i32)
566 .finish();
567
568 builder
569 .new_object()
570 .with_field("name", "Bob")
571 .with_field("age", 42i32)
572 .with_field("city", "Wonderland")
573 .finish();
574
575 builder
576 .new_object()
577 .with_field("name", "Charlie")
578 .with_field("age", 1i32)
579 .finish();
580
581 let array = builder.build();
582
583 let mut value_builder = VariantValueArrayBuilder::new(3);
588
589 value_builder.append_value(array.value(0));
591
592 let value = array.value(1);
594 let mut builder = value_builder.builder_ext(value.metadata());
595 builder
596 .new_object()
597 .with_field("name", value.get_object_field("name").unwrap())
598 .with_field("age", value.get_object_field("age").unwrap())
599 .finish();
600
601 let value = array.value(2);
603 let mut builder = value_builder.builder_ext(value.metadata());
604 builder
605 .new_list()
606 .with_value(value.clone())
607 .with_value(value.clone())
608 .finish();
609
610 let array2 = VariantArray::from_parts(
611 array.metadata_field().clone(),
612 Some(value_builder.build().unwrap()),
613 None,
614 None,
615 );
616
617 assert_eq!(array2.len(), 3);
618 assert_eq!(array.value(0), array2.value(0));
619
620 assert_eq!(
621 array.value(1).get_object_field("name"),
622 array2.value(1).get_object_field("name")
623 );
624 assert_eq!(
625 array.value(1).get_object_field("age"),
626 array2.value(1).get_object_field("age")
627 );
628
629 assert_eq!(array.value(2), array2.value(2).get_list_element(0).unwrap());
630 assert_eq!(array.value(2), array2.value(2).get_list_element(1).unwrap());
631 }
632}