1use crate::StructArray;
19use crate::builder::*;
20use arrow_buffer::NullBufferBuilder;
21use arrow_schema::{Fields, SchemaBuilder};
22use std::sync::Arc;
23
24pub struct StructBuilder {
103 fields: Fields,
104 field_builders: Vec<Box<dyn ArrayBuilder>>,
105 null_buffer_builder: NullBufferBuilder,
106}
107
108impl std::fmt::Debug for StructBuilder {
109 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110 f.debug_struct("StructBuilder")
111 .field("fields", &self.fields)
112 .field("bitmap_builder", &self.null_buffer_builder)
113 .field("len", &self.len())
114 .finish()
115 }
116}
117
118impl ArrayBuilder for StructBuilder {
119 fn len(&self) -> usize {
125 self.null_buffer_builder.len()
126 }
127
128 fn finish(&mut self) -> ArrayRef {
130 Arc::new(self.finish())
131 }
132
133 fn finish_cloned(&self) -> ArrayRef {
135 Arc::new(self.finish_cloned())
136 }
137
138 fn finish_preserve_values(&mut self) -> ArrayRef {
139 Arc::new(self.finish_preserve_values())
140 }
141
142 fn as_any(&self) -> &dyn Any {
148 self
149 }
150
151 fn as_any_mut(&mut self) -> &mut dyn Any {
157 self
158 }
159
160 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
162 self
163 }
164}
165
166impl StructBuilder {
167 pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
169 Self {
170 field_builders,
171 fields: fields.into(),
172 null_buffer_builder: NullBufferBuilder::new(0),
173 }
174 }
175
176 pub fn from_fields(fields: impl Into<Fields>, capacity: usize) -> Self {
178 let fields = fields.into();
179 let mut builders = Vec::with_capacity(fields.len());
180 for field in &fields {
181 builders.push(make_builder(field.data_type(), capacity));
182 }
183 Self::new(fields, builders)
184 }
185
186 pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
190 self.field_builders[i].as_any_mut().downcast_mut::<T>()
191 }
192
193 pub fn field_builders(&self) -> &[Box<dyn ArrayBuilder>] {
195 &self.field_builders
196 }
197
198 pub fn field_builders_mut(&mut self) -> &mut [Box<dyn ArrayBuilder>] {
200 &mut self.field_builders
201 }
202
203 pub fn num_fields(&self) -> usize {
205 self.field_builders.len()
206 }
207
208 pub fn fields(&self) -> &Fields {
210 &self.fields
211 }
212
213 #[inline]
216 pub fn append(&mut self, is_valid: bool) {
217 self.null_buffer_builder.append(is_valid);
218 }
219
220 #[inline]
222 pub fn append_non_nulls(&mut self, n: usize) {
223 self.null_buffer_builder.append_n_non_nulls(n);
224 }
225
226 #[inline]
228 pub fn append_null(&mut self) {
229 self.append(false)
230 }
231
232 #[inline]
234 pub fn append_nulls(&mut self, n: usize) {
235 self.null_buffer_builder.append_n_nulls(n);
236 }
237
238 pub fn finish(&mut self) -> StructArray {
240 self.validate_content();
241 if self.fields.is_empty() {
242 return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
243 }
244
245 let arrays = self.field_builders.iter_mut().map(|f| f.finish()).collect();
246 let nulls = self.null_buffer_builder.finish();
247 StructArray::new(self.fields.clone(), arrays, nulls)
248 }
249
250 pub fn finish_cloned(&self) -> StructArray {
252 self.validate_content();
253
254 if self.fields.is_empty() {
255 return StructArray::new_empty_fields(
256 self.len(),
257 self.null_buffer_builder.finish_cloned(),
258 );
259 }
260
261 let arrays = self
262 .field_builders
263 .iter()
264 .map(|f| f.finish_cloned())
265 .collect();
266
267 let nulls = self.null_buffer_builder.finish_cloned();
268
269 StructArray::new(self.fields.clone(), arrays, nulls)
270 }
271
272 fn finish_preserve_values(&mut self) -> StructArray {
273 self.validate_content();
274 if self.fields.is_empty() {
275 return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
276 }
277
278 let arrays = self
279 .field_builders
280 .iter_mut()
281 .map(|f| f.finish_preserve_values())
282 .collect();
283
284 let nulls = self.null_buffer_builder.finish();
285
286 StructArray::new(self.fields.clone(), arrays, nulls)
287 }
288
289 fn validate_content(&self) {
293 if self.fields.len() != self.field_builders.len() {
294 panic!("Number of fields is not equal to the number of field_builders.");
295 }
296 self.field_builders.iter().enumerate().for_each(|(idx, x)| {
297 if x.len() != self.len() {
298 let builder = SchemaBuilder::from(&self.fields);
299 let schema = builder.finish();
300
301 panic!("{}", format!(
302 "StructBuilder ({}) and field_builder with index {} ({}) are of unequal lengths: ({} != {}).",
303 schema,
304 idx,
305 self.fields[idx].data_type(),
306 self.len(),
307 x.len()
308 ));
309 }
310 });
311 }
312
313 pub fn validity_slice(&self) -> Option<&[u8]> {
315 self.null_buffer_builder.as_slice()
316 }
317}
318
319#[cfg(test)]
320mod tests {
321 use std::any::type_name;
322
323 use super::*;
324 use arrow_buffer::Buffer;
325 use arrow_data::ArrayData;
326 use arrow_schema::Field;
327
328 use crate::{array::Array, builder::tests::PreserveValuesMock, types::ArrowDictionaryKeyType};
329
330 #[test]
331 fn test_struct_array_builder() {
332 let string_builder = StringBuilder::new();
333 let int_builder = Int32Builder::new();
334
335 let fields = vec![
336 Field::new("f1", DataType::Utf8, true),
337 Field::new("f2", DataType::Int32, true),
338 ];
339 let field_builders = vec![
340 Box::new(string_builder) as Box<dyn ArrayBuilder>,
341 Box::new(int_builder) as Box<dyn ArrayBuilder>,
342 ];
343
344 let mut builder = StructBuilder::new(fields, field_builders);
345 assert_eq!(2, builder.num_fields());
346
347 let string_builder = builder
348 .field_builder::<StringBuilder>(0)
349 .expect("builder at field 0 should be string builder");
350 string_builder.append_value("joe");
351 string_builder.append_null();
352 string_builder.append_null();
353 string_builder.append_value("mark");
354 string_builder.append_nulls(2);
355 string_builder.append_value("terry");
356
357 let int_builder = builder
358 .field_builder::<Int32Builder>(1)
359 .expect("builder at field 1 should be int builder");
360 int_builder.append_value(1);
361 int_builder.append_value(2);
362 int_builder.append_null();
363 int_builder.append_value(4);
364 int_builder.append_nulls(2);
365 int_builder.append_value(3);
366
367 builder.append(true);
368 builder.append(true);
369 builder.append_null();
370 builder.append(true);
371
372 builder.append_nulls(2);
373 builder.append(true);
374
375 let struct_data = builder.finish().into_data();
376
377 assert_eq!(7, struct_data.len());
378 assert_eq!(3, struct_data.null_count());
379 assert_eq!(&[75_u8], struct_data.nulls().unwrap().validity());
380
381 let expected_string_data = ArrayData::builder(DataType::Utf8)
382 .len(7)
383 .null_bit_buffer(Some(Buffer::from(&[73_u8])))
384 .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7, 7, 7, 12]))
385 .add_buffer(Buffer::from_slice_ref(b"joemarkterry"))
386 .build()
387 .unwrap();
388
389 let expected_int_data = ArrayData::builder(DataType::Int32)
390 .len(7)
391 .null_bit_buffer(Some(Buffer::from_slice_ref([75_u8])))
392 .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4, 4, 4, 3]))
393 .build()
394 .unwrap();
395
396 assert_eq!(expected_string_data, struct_data.child_data()[0]);
397 assert_eq!(expected_int_data, struct_data.child_data()[1]);
398
399 assert!(struct_data.is_null(4));
400 assert!(struct_data.is_null(5));
401 }
402
403 #[test]
404 fn test_struct_array_builder_finish() {
405 let int_builder = Int32Builder::new();
406 let bool_builder = BooleanBuilder::new();
407
408 let fields = vec![
409 Field::new("f1", DataType::Int32, false),
410 Field::new("f2", DataType::Boolean, false),
411 ];
412 let field_builders = vec![
413 Box::new(int_builder) as Box<dyn ArrayBuilder>,
414 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
415 ];
416
417 let mut builder = StructBuilder::new(fields, field_builders);
418 builder
419 .field_builder::<Int32Builder>(0)
420 .unwrap()
421 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
422 builder
423 .field_builder::<BooleanBuilder>(1)
424 .unwrap()
425 .append_slice(&[
426 false, true, false, true, false, true, false, true, false, true,
427 ]);
428
429 for _ in 0..10 {
431 builder.append(true);
432 }
433
434 assert_eq!(10, builder.len());
435
436 let arr = builder.finish();
437
438 assert_eq!(10, arr.len());
439 assert_eq!(0, builder.len());
440
441 builder
442 .field_builder::<Int32Builder>(0)
443 .unwrap()
444 .append_slice(&[1, 3, 5, 7, 9]);
445 builder
446 .field_builder::<BooleanBuilder>(1)
447 .unwrap()
448 .append_slice(&[false, true, false, true, false]);
449
450 for _ in 0..5 {
452 builder.append(true);
453 }
454
455 assert_eq!(5, builder.len());
456
457 let arr = builder.finish();
458
459 assert_eq!(5, arr.len());
460 assert_eq!(0, builder.len());
461 }
462
463 #[test]
464 fn test_build_fixed_size_list() {
465 const LIST_LENGTH: i32 = 4;
466 let fixed_size_list_dtype =
467 DataType::new_fixed_size_list(DataType::Int32, LIST_LENGTH, false);
468 let mut builder = make_builder(&fixed_size_list_dtype, 10);
469 let builder = builder
470 .as_any_mut()
471 .downcast_mut::<FixedSizeListBuilder<Box<dyn ArrayBuilder>>>();
472 match builder {
473 Some(builder) => {
474 assert_eq!(builder.value_length(), LIST_LENGTH);
475 assert!(
476 builder
477 .values()
478 .as_any_mut()
479 .downcast_mut::<Int32Builder>()
480 .is_some()
481 );
482 }
483 None => panic!("expected FixedSizeListBuilder, got a different builder type"),
484 }
485 }
486
487 #[test]
488 fn test_struct_array_builder_finish_cloned() {
489 let int_builder = Int32Builder::new();
490 let bool_builder = BooleanBuilder::new();
491
492 let fields = vec![
493 Field::new("f1", DataType::Int32, false),
494 Field::new("f2", DataType::Boolean, false),
495 ];
496 let field_builders = vec![
497 Box::new(int_builder) as Box<dyn ArrayBuilder>,
498 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
499 ];
500
501 let mut builder = StructBuilder::new(fields, field_builders);
502 builder
503 .field_builder::<Int32Builder>(0)
504 .unwrap()
505 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
506 builder
507 .field_builder::<BooleanBuilder>(1)
508 .unwrap()
509 .append_slice(&[
510 false, true, false, true, false, true, false, true, false, true,
511 ]);
512
513 for _ in 0..10 {
515 builder.append(true);
516 }
517
518 assert_eq!(10, builder.len());
519
520 let mut arr = builder.finish_cloned();
521
522 assert_eq!(10, arr.len());
523 assert_eq!(10, builder.len());
524
525 builder
526 .field_builder::<Int32Builder>(0)
527 .unwrap()
528 .append_slice(&[1, 3, 5, 7, 9]);
529 builder
530 .field_builder::<BooleanBuilder>(1)
531 .unwrap()
532 .append_slice(&[false, true, false, true, false]);
533
534 for _ in 0..5 {
536 builder.append(true);
537 }
538
539 assert_eq!(15, builder.len());
540
541 arr = builder.finish();
542
543 assert_eq!(15, arr.len());
544 assert_eq!(0, builder.len());
545 }
546
547 #[test]
548 fn test_struct_array_builder_finish_preserve_values() {
549 let fields = vec![Field::new("mock", DataType::Int32, false)];
550 let field_builders = vec![Box::new(PreserveValuesMock::default()) as Box<dyn ArrayBuilder>];
551
552 let mut builder = StructBuilder::new(fields, field_builders);
553 builder
554 .field_builder::<PreserveValuesMock>(0)
555 .unwrap()
556 .inner
557 .append_value(1);
558 builder.append(true);
559
560 assert_eq!(1, builder.len());
561
562 let arr = builder.finish_preserve_values();
563
564 assert_eq!(1, arr.len());
565 assert_eq!(
566 1,
567 builder
568 .field_builder::<PreserveValuesMock>(0)
569 .unwrap()
570 .called
571 );
572 }
573
574 #[test]
575 fn test_struct_array_builder_from_schema() {
576 let mut fields = vec![
577 Field::new("f1", DataType::Float32, false),
578 Field::new("f2", DataType::Utf8, false),
579 ];
580 let sub_fields = vec![
581 Field::new("g1", DataType::Int32, false),
582 Field::new("g2", DataType::Boolean, false),
583 ];
584 let struct_type = DataType::Struct(sub_fields.into());
585 fields.push(Field::new("f3", struct_type, false));
586
587 let mut builder = StructBuilder::from_fields(fields, 5);
588 assert_eq!(3, builder.num_fields());
589 assert!(builder.field_builder::<Float32Builder>(0).is_some());
590 assert!(builder.field_builder::<StringBuilder>(1).is_some());
591 assert!(builder.field_builder::<StructBuilder>(2).is_some());
592 }
593
594 #[test]
595 fn test_datatype_properties() {
596 let fields = Fields::from(vec![
597 Field::new("f1", DataType::Decimal128(1, 2), false),
598 Field::new(
599 "f2",
600 DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
601 false,
602 ),
603 ]);
604 let mut builder = StructBuilder::from_fields(fields.clone(), 1);
605 builder
606 .field_builder::<Decimal128Builder>(0)
607 .unwrap()
608 .append_value(1);
609 builder
610 .field_builder::<TimestampMillisecondBuilder>(1)
611 .unwrap()
612 .append_value(1);
613 builder.append(true);
614 let array = builder.finish();
615
616 assert_eq!(array.data_type(), &DataType::Struct(fields.clone()));
617 assert_eq!(array.column(0).data_type(), fields[0].data_type());
618 assert_eq!(array.column(1).data_type(), fields[1].data_type());
619 }
620
621 #[test]
622 fn test_struct_array_builder_from_dictionary_type_int8_key() {
623 test_struct_array_builder_from_dictionary_type_inner::<Int8Type>(DataType::Int8);
624 }
625
626 #[test]
627 fn test_struct_array_builder_from_dictionary_type_int16_key() {
628 test_struct_array_builder_from_dictionary_type_inner::<Int16Type>(DataType::Int16);
629 }
630
631 #[test]
632 fn test_struct_array_builder_from_dictionary_type_int32_key() {
633 test_struct_array_builder_from_dictionary_type_inner::<Int32Type>(DataType::Int32);
634 }
635
636 #[test]
637 fn test_struct_array_builder_from_dictionary_type_int64_key() {
638 test_struct_array_builder_from_dictionary_type_inner::<Int64Type>(DataType::Int64);
639 }
640
641 fn test_struct_array_builder_from_dictionary_type_inner<K: ArrowDictionaryKeyType>(
642 key_type: DataType,
643 ) {
644 let dict_field = Field::new(
645 "f1",
646 DataType::Dictionary(Box::new(key_type), Box::new(DataType::Utf8)),
647 false,
648 );
649 let fields = vec![dict_field.clone()];
650 let expected_dtype = DataType::Struct(fields.into());
651 let cloned_dict_field = dict_field.clone();
652 let expected_child_dtype = dict_field.data_type();
653 let mut struct_builder = StructBuilder::from_fields(vec![cloned_dict_field], 5);
654 let Some(dict_builder) = struct_builder.field_builder::<StringDictionaryBuilder<K>>(0)
655 else {
656 panic!(
657 "Builder should be StringDictionaryBuilder<{}>",
658 type_name::<K>()
659 )
660 };
661 dict_builder.append_value("dict string");
662 struct_builder.append(true);
663 let array = struct_builder.finish();
664
665 assert_eq!(array.data_type(), &expected_dtype);
666 assert_eq!(array.column(0).data_type(), expected_child_dtype);
667 assert_eq!(array.column(0).len(), 1);
668 }
669
670 #[test]
671 #[should_panic(
672 expected = "Data type Dictionary(UInt64, Utf8) with key type UInt64 is not currently supported"
673 )]
674 fn test_struct_array_builder_from_schema_unsupported_type() {
675 let fields = vec![
676 Field::new("f1", DataType::UInt64, false),
677 Field::new(
678 "f2",
679 DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
680 false,
681 ),
682 ];
683
684 let _ = StructBuilder::from_fields(fields, 5);
685 }
686
687 #[test]
688 #[should_panic(expected = "Dictionary value type Int32 is not currently supported")]
689 fn test_struct_array_builder_from_dict_with_unsupported_value_type() {
690 let fields = vec![Field::new(
691 "f1",
692 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32)),
693 false,
694 )];
695
696 let _ = StructBuilder::from_fields(fields, 5);
697 }
698
699 #[test]
700 fn test_struct_array_builder_field_builder_type_mismatch() {
701 let int_builder = Int32Builder::with_capacity(10);
702
703 let fields = vec![Field::new("f1", DataType::Int32, false)];
704 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
705
706 let mut builder = StructBuilder::new(fields, field_builders);
707 assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
708 }
709
710 #[test]
711 #[should_panic(
712 expected = "StructBuilder (Field { \"f1\": Int32 }, Field { \"f2\": Boolean }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
713 )]
714 fn test_struct_array_builder_unequal_field_builders_lengths() {
715 let mut int_builder = Int32Builder::with_capacity(10);
716 let mut bool_builder = BooleanBuilder::new();
717
718 int_builder.append_value(1);
719 int_builder.append_value(2);
720 bool_builder.append_value(true);
721
722 let fields = vec![
723 Field::new("f1", DataType::Int32, false),
724 Field::new("f2", DataType::Boolean, false),
725 ];
726 let field_builders = vec![
727 Box::new(int_builder) as Box<dyn ArrayBuilder>,
728 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
729 ];
730
731 let mut builder = StructBuilder::new(fields, field_builders);
732 builder.append(true);
733 builder.append(true);
734 builder.finish();
735 }
736
737 #[test]
738 #[should_panic(expected = "Number of fields is not equal to the number of field_builders.")]
739 fn test_struct_array_builder_unequal_field_field_builders() {
740 let int_builder = Int32Builder::with_capacity(10);
741
742 let fields = vec![
743 Field::new("f1", DataType::Int32, false),
744 Field::new("f2", DataType::Boolean, false),
745 ];
746 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
747
748 let mut builder = StructBuilder::new(fields, field_builders);
749 builder.finish();
750 }
751
752 #[test]
753 #[should_panic(
754 expected = "Incorrect datatype for StructArray field \\\"timestamp\\\", expected Timestamp(ns, \\\"UTC\\\") got Timestamp(ns)"
755 )]
756 fn test_struct_array_mismatch_builder() {
757 let fields = vec![Field::new(
758 "timestamp",
759 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_owned().into())),
760 false,
761 )];
762
763 let field_builders: Vec<Box<dyn ArrayBuilder>> =
764 vec![Box::new(TimestampNanosecondBuilder::new())];
765
766 let mut sa = StructBuilder::new(fields, field_builders);
767 sa.finish();
768 }
769
770 #[test]
771 fn test_empty() {
772 let mut builder = StructBuilder::new(Fields::empty(), vec![]);
773 builder.append(true);
774 builder.append(false);
775
776 let a1 = builder.finish_cloned();
777 let a2 = builder.finish();
778 assert_eq!(a1, a2);
779 assert_eq!(a1.len(), 2);
780 assert_eq!(a1.null_count(), 1);
781 assert!(a1.is_valid(0));
782 assert!(a1.is_null(1));
783 }
784
785 #[test]
786 fn test_append_non_nulls() {
787 let int_builder = Int32Builder::new();
788 let fields = vec![Field::new("f1", DataType::Int32, false)];
789 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
790
791 let mut builder = StructBuilder::new(fields, field_builders);
792 builder
793 .field_builder::<Int32Builder>(0)
794 .unwrap()
795 .append_slice(&[1, 2, 3, 4, 5]);
796 builder.append_non_nulls(5);
797
798 let arr = builder.finish();
799 assert_eq!(arr.len(), 5);
800 assert_eq!(arr.null_count(), 0);
801 for i in 0..5 {
802 assert!(arr.is_valid(i));
803 }
804 }
805
806 #[test]
807 fn test_append_non_nulls_with_nulls() {
808 let mut builder = StructBuilder::new(Fields::empty(), vec![]);
809 builder.append_null();
810 builder.append_non_nulls(3);
811 builder.append_nulls(2);
812 builder.append_non_nulls(1);
813
814 let arr = builder.finish();
815 assert_eq!(arr.len(), 7);
816 assert_eq!(arr.null_count(), 3);
817 assert!(arr.is_null(0));
818 assert!(arr.is_valid(1));
819 assert!(arr.is_valid(2));
820 assert!(arr.is_valid(3));
821 assert!(arr.is_null(4));
822 assert!(arr.is_null(5));
823 assert!(arr.is_valid(6));
824 }
825
826 #[test]
827 fn test_append_non_nulls_zero() {
828 let mut builder = StructBuilder::new(Fields::empty(), vec![]);
829 builder.append_non_nulls(0);
830 assert_eq!(builder.len(), 0);
831
832 builder.append(true);
833 builder.append_non_nulls(0);
834 assert_eq!(builder.len(), 1);
835
836 let arr = builder.finish();
837 assert_eq!(arr.len(), 1);
838 assert_eq!(arr.null_count(), 0);
839 }
840}