1use crate::StructArray;
19use crate::builder::*;
20use arrow_buffer::NullBufferBuilder;
21use arrow_schema::{Fields, SchemaBuilder};
22use std::sync::Arc;
23
24pub struct StructBuilder {
103 fields: Fields,
104 field_builders: Vec<Box<dyn ArrayBuilder>>,
105 null_buffer_builder: NullBufferBuilder,
106}
107
108impl std::fmt::Debug for StructBuilder {
109 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110 f.debug_struct("StructBuilder")
111 .field("fields", &self.fields)
112 .field("bitmap_builder", &self.null_buffer_builder)
113 .field("len", &self.len())
114 .finish()
115 }
116}
117
118impl ArrayBuilder for StructBuilder {
119 fn len(&self) -> usize {
125 self.null_buffer_builder.len()
126 }
127
128 fn finish(&mut self) -> ArrayRef {
130 Arc::new(self.finish())
131 }
132
133 fn finish_cloned(&self) -> ArrayRef {
135 Arc::new(self.finish_cloned())
136 }
137
138 fn as_any(&self) -> &dyn Any {
144 self
145 }
146
147 fn as_any_mut(&mut self) -> &mut dyn Any {
153 self
154 }
155
156 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
158 self
159 }
160}
161
162impl StructBuilder {
163 pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
165 Self {
166 field_builders,
167 fields: fields.into(),
168 null_buffer_builder: NullBufferBuilder::new(0),
169 }
170 }
171
172 pub fn from_fields(fields: impl Into<Fields>, capacity: usize) -> Self {
174 let fields = fields.into();
175 let mut builders = Vec::with_capacity(fields.len());
176 for field in &fields {
177 builders.push(make_builder(field.data_type(), capacity));
178 }
179 Self::new(fields, builders)
180 }
181
182 pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
186 self.field_builders[i].as_any_mut().downcast_mut::<T>()
187 }
188
189 pub fn field_builders(&self) -> &[Box<dyn ArrayBuilder>] {
191 &self.field_builders
192 }
193
194 pub fn field_builders_mut(&mut self) -> &mut [Box<dyn ArrayBuilder>] {
196 &mut self.field_builders
197 }
198
199 pub fn num_fields(&self) -> usize {
201 self.field_builders.len()
202 }
203
204 pub fn fields(&self) -> &Fields {
206 &self.fields
207 }
208
209 #[inline]
212 pub fn append(&mut self, is_valid: bool) {
213 self.null_buffer_builder.append(is_valid);
214 }
215
216 #[inline]
218 pub fn append_non_nulls(&mut self, n: usize) {
219 self.null_buffer_builder.append_n_non_nulls(n);
220 }
221
222 #[inline]
224 pub fn append_null(&mut self) {
225 self.append(false)
226 }
227
228 #[inline]
230 pub fn append_nulls(&mut self, n: usize) {
231 self.null_buffer_builder.append_n_nulls(n);
232 }
233
234 pub fn finish(&mut self) -> StructArray {
236 self.validate_content();
237 if self.fields.is_empty() {
238 return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
239 }
240
241 let arrays = self.field_builders.iter_mut().map(|f| f.finish()).collect();
242 let nulls = self.null_buffer_builder.finish();
243 StructArray::new(self.fields.clone(), arrays, nulls)
244 }
245
246 pub fn finish_cloned(&self) -> StructArray {
248 self.validate_content();
249
250 if self.fields.is_empty() {
251 return StructArray::new_empty_fields(
252 self.len(),
253 self.null_buffer_builder.finish_cloned(),
254 );
255 }
256
257 let arrays = self
258 .field_builders
259 .iter()
260 .map(|f| f.finish_cloned())
261 .collect();
262
263 let nulls = self.null_buffer_builder.finish_cloned();
264
265 StructArray::new(self.fields.clone(), arrays, nulls)
266 }
267
268 fn validate_content(&self) {
272 if self.fields.len() != self.field_builders.len() {
273 panic!("Number of fields is not equal to the number of field_builders.");
274 }
275 self.field_builders.iter().enumerate().for_each(|(idx, x)| {
276 if x.len() != self.len() {
277 let builder = SchemaBuilder::from(&self.fields);
278 let schema = builder.finish();
279
280 panic!("{}", format!(
281 "StructBuilder ({}) and field_builder with index {} ({}) are of unequal lengths: ({} != {}).",
282 schema,
283 idx,
284 self.fields[idx].data_type(),
285 self.len(),
286 x.len()
287 ));
288 }
289 });
290 }
291
292 pub fn validity_slice(&self) -> Option<&[u8]> {
294 self.null_buffer_builder.as_slice()
295 }
296}
297
298#[cfg(test)]
299mod tests {
300 use std::any::type_name;
301
302 use super::*;
303 use arrow_buffer::Buffer;
304 use arrow_data::ArrayData;
305 use arrow_schema::Field;
306
307 use crate::{array::Array, types::ArrowDictionaryKeyType};
308
309 #[test]
310 fn test_struct_array_builder() {
311 let string_builder = StringBuilder::new();
312 let int_builder = Int32Builder::new();
313
314 let fields = vec![
315 Field::new("f1", DataType::Utf8, true),
316 Field::new("f2", DataType::Int32, true),
317 ];
318 let field_builders = vec![
319 Box::new(string_builder) as Box<dyn ArrayBuilder>,
320 Box::new(int_builder) as Box<dyn ArrayBuilder>,
321 ];
322
323 let mut builder = StructBuilder::new(fields, field_builders);
324 assert_eq!(2, builder.num_fields());
325
326 let string_builder = builder
327 .field_builder::<StringBuilder>(0)
328 .expect("builder at field 0 should be string builder");
329 string_builder.append_value("joe");
330 string_builder.append_null();
331 string_builder.append_null();
332 string_builder.append_value("mark");
333 string_builder.append_nulls(2);
334 string_builder.append_value("terry");
335
336 let int_builder = builder
337 .field_builder::<Int32Builder>(1)
338 .expect("builder at field 1 should be int builder");
339 int_builder.append_value(1);
340 int_builder.append_value(2);
341 int_builder.append_null();
342 int_builder.append_value(4);
343 int_builder.append_nulls(2);
344 int_builder.append_value(3);
345
346 builder.append(true);
347 builder.append(true);
348 builder.append_null();
349 builder.append(true);
350
351 builder.append_nulls(2);
352 builder.append(true);
353
354 let struct_data = builder.finish().into_data();
355
356 assert_eq!(7, struct_data.len());
357 assert_eq!(3, struct_data.null_count());
358 assert_eq!(&[75_u8], struct_data.nulls().unwrap().validity());
359
360 let expected_string_data = ArrayData::builder(DataType::Utf8)
361 .len(7)
362 .null_bit_buffer(Some(Buffer::from(&[73_u8])))
363 .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7, 7, 7, 12]))
364 .add_buffer(Buffer::from_slice_ref(b"joemarkterry"))
365 .build()
366 .unwrap();
367
368 let expected_int_data = ArrayData::builder(DataType::Int32)
369 .len(7)
370 .null_bit_buffer(Some(Buffer::from_slice_ref([75_u8])))
371 .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4, 4, 4, 3]))
372 .build()
373 .unwrap();
374
375 assert_eq!(expected_string_data, struct_data.child_data()[0]);
376 assert_eq!(expected_int_data, struct_data.child_data()[1]);
377
378 assert!(struct_data.is_null(4));
379 assert!(struct_data.is_null(5));
380 }
381
382 #[test]
383 fn test_struct_array_builder_finish() {
384 let int_builder = Int32Builder::new();
385 let bool_builder = BooleanBuilder::new();
386
387 let fields = vec![
388 Field::new("f1", DataType::Int32, false),
389 Field::new("f2", DataType::Boolean, false),
390 ];
391 let field_builders = vec![
392 Box::new(int_builder) as Box<dyn ArrayBuilder>,
393 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
394 ];
395
396 let mut builder = StructBuilder::new(fields, field_builders);
397 builder
398 .field_builder::<Int32Builder>(0)
399 .unwrap()
400 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
401 builder
402 .field_builder::<BooleanBuilder>(1)
403 .unwrap()
404 .append_slice(&[
405 false, true, false, true, false, true, false, true, false, true,
406 ]);
407
408 for _ in 0..10 {
410 builder.append(true);
411 }
412
413 assert_eq!(10, builder.len());
414
415 let arr = builder.finish();
416
417 assert_eq!(10, arr.len());
418 assert_eq!(0, builder.len());
419
420 builder
421 .field_builder::<Int32Builder>(0)
422 .unwrap()
423 .append_slice(&[1, 3, 5, 7, 9]);
424 builder
425 .field_builder::<BooleanBuilder>(1)
426 .unwrap()
427 .append_slice(&[false, true, false, true, false]);
428
429 for _ in 0..5 {
431 builder.append(true);
432 }
433
434 assert_eq!(5, builder.len());
435
436 let arr = builder.finish();
437
438 assert_eq!(5, arr.len());
439 assert_eq!(0, builder.len());
440 }
441
442 #[test]
443 fn test_build_fixed_size_list() {
444 const LIST_LENGTH: i32 = 4;
445 let fixed_size_list_dtype =
446 DataType::new_fixed_size_list(DataType::Int32, LIST_LENGTH, false);
447 let mut builder = make_builder(&fixed_size_list_dtype, 10);
448 let builder = builder
449 .as_any_mut()
450 .downcast_mut::<FixedSizeListBuilder<Box<dyn ArrayBuilder>>>();
451 match builder {
452 Some(builder) => {
453 assert_eq!(builder.value_length(), LIST_LENGTH);
454 assert!(
455 builder
456 .values()
457 .as_any_mut()
458 .downcast_mut::<Int32Builder>()
459 .is_some()
460 );
461 }
462 None => panic!("expected FixedSizeListBuilder, got a different builder type"),
463 }
464 }
465
466 #[test]
467 fn test_struct_array_builder_finish_cloned() {
468 let int_builder = Int32Builder::new();
469 let bool_builder = BooleanBuilder::new();
470
471 let fields = vec![
472 Field::new("f1", DataType::Int32, false),
473 Field::new("f2", DataType::Boolean, false),
474 ];
475 let field_builders = vec![
476 Box::new(int_builder) as Box<dyn ArrayBuilder>,
477 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
478 ];
479
480 let mut builder = StructBuilder::new(fields, field_builders);
481 builder
482 .field_builder::<Int32Builder>(0)
483 .unwrap()
484 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
485 builder
486 .field_builder::<BooleanBuilder>(1)
487 .unwrap()
488 .append_slice(&[
489 false, true, false, true, false, true, false, true, false, true,
490 ]);
491
492 for _ in 0..10 {
494 builder.append(true);
495 }
496
497 assert_eq!(10, builder.len());
498
499 let mut arr = builder.finish_cloned();
500
501 assert_eq!(10, arr.len());
502 assert_eq!(10, builder.len());
503
504 builder
505 .field_builder::<Int32Builder>(0)
506 .unwrap()
507 .append_slice(&[1, 3, 5, 7, 9]);
508 builder
509 .field_builder::<BooleanBuilder>(1)
510 .unwrap()
511 .append_slice(&[false, true, false, true, false]);
512
513 for _ in 0..5 {
515 builder.append(true);
516 }
517
518 assert_eq!(15, builder.len());
519
520 arr = builder.finish();
521
522 assert_eq!(15, arr.len());
523 assert_eq!(0, builder.len());
524 }
525
526 #[test]
527 fn test_struct_array_builder_from_schema() {
528 let mut fields = vec![
529 Field::new("f1", DataType::Float32, false),
530 Field::new("f2", DataType::Utf8, false),
531 ];
532 let sub_fields = vec![
533 Field::new("g1", DataType::Int32, false),
534 Field::new("g2", DataType::Boolean, false),
535 ];
536 let struct_type = DataType::Struct(sub_fields.into());
537 fields.push(Field::new("f3", struct_type, false));
538
539 let mut builder = StructBuilder::from_fields(fields, 5);
540 assert_eq!(3, builder.num_fields());
541 assert!(builder.field_builder::<Float32Builder>(0).is_some());
542 assert!(builder.field_builder::<StringBuilder>(1).is_some());
543 assert!(builder.field_builder::<StructBuilder>(2).is_some());
544 }
545
546 #[test]
547 fn test_datatype_properties() {
548 let fields = Fields::from(vec![
549 Field::new("f1", DataType::Decimal128(1, 2), false),
550 Field::new(
551 "f2",
552 DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
553 false,
554 ),
555 ]);
556 let mut builder = StructBuilder::from_fields(fields.clone(), 1);
557 builder
558 .field_builder::<Decimal128Builder>(0)
559 .unwrap()
560 .append_value(1);
561 builder
562 .field_builder::<TimestampMillisecondBuilder>(1)
563 .unwrap()
564 .append_value(1);
565 builder.append(true);
566 let array = builder.finish();
567
568 assert_eq!(array.data_type(), &DataType::Struct(fields.clone()));
569 assert_eq!(array.column(0).data_type(), fields[0].data_type());
570 assert_eq!(array.column(1).data_type(), fields[1].data_type());
571 }
572
573 #[test]
574 fn test_struct_array_builder_from_dictionary_type_int8_key() {
575 test_struct_array_builder_from_dictionary_type_inner::<Int8Type>(DataType::Int8);
576 }
577
578 #[test]
579 fn test_struct_array_builder_from_dictionary_type_int16_key() {
580 test_struct_array_builder_from_dictionary_type_inner::<Int16Type>(DataType::Int16);
581 }
582
583 #[test]
584 fn test_struct_array_builder_from_dictionary_type_int32_key() {
585 test_struct_array_builder_from_dictionary_type_inner::<Int32Type>(DataType::Int32);
586 }
587
588 #[test]
589 fn test_struct_array_builder_from_dictionary_type_int64_key() {
590 test_struct_array_builder_from_dictionary_type_inner::<Int64Type>(DataType::Int64);
591 }
592
593 fn test_struct_array_builder_from_dictionary_type_inner<K: ArrowDictionaryKeyType>(
594 key_type: DataType,
595 ) {
596 let dict_field = Field::new(
597 "f1",
598 DataType::Dictionary(Box::new(key_type), Box::new(DataType::Utf8)),
599 false,
600 );
601 let fields = vec![dict_field.clone()];
602 let expected_dtype = DataType::Struct(fields.into());
603 let cloned_dict_field = dict_field.clone();
604 let expected_child_dtype = dict_field.data_type();
605 let mut struct_builder = StructBuilder::from_fields(vec![cloned_dict_field], 5);
606 let Some(dict_builder) = struct_builder.field_builder::<StringDictionaryBuilder<K>>(0)
607 else {
608 panic!(
609 "Builder should be StringDictionaryBuilder<{}>",
610 type_name::<K>()
611 )
612 };
613 dict_builder.append_value("dict string");
614 struct_builder.append(true);
615 let array = struct_builder.finish();
616
617 assert_eq!(array.data_type(), &expected_dtype);
618 assert_eq!(array.column(0).data_type(), expected_child_dtype);
619 assert_eq!(array.column(0).len(), 1);
620 }
621
622 #[test]
623 #[should_panic(
624 expected = "Data type Dictionary(UInt64, Utf8) with key type UInt64 is not currently supported"
625 )]
626 fn test_struct_array_builder_from_schema_unsupported_type() {
627 let fields = vec![
628 Field::new("f1", DataType::UInt64, false),
629 Field::new(
630 "f2",
631 DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
632 false,
633 ),
634 ];
635
636 let _ = StructBuilder::from_fields(fields, 5);
637 }
638
639 #[test]
640 #[should_panic(expected = "Dictionary value type Int32 is not currently supported")]
641 fn test_struct_array_builder_from_dict_with_unsupported_value_type() {
642 let fields = vec![Field::new(
643 "f1",
644 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32)),
645 false,
646 )];
647
648 let _ = StructBuilder::from_fields(fields, 5);
649 }
650
651 #[test]
652 fn test_struct_array_builder_field_builder_type_mismatch() {
653 let int_builder = Int32Builder::with_capacity(10);
654
655 let fields = vec![Field::new("f1", DataType::Int32, false)];
656 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
657
658 let mut builder = StructBuilder::new(fields, field_builders);
659 assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
660 }
661
662 #[test]
663 #[should_panic(
664 expected = "StructBuilder (Field { \"f1\": Int32 }, Field { \"f2\": Boolean }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
665 )]
666 fn test_struct_array_builder_unequal_field_builders_lengths() {
667 let mut int_builder = Int32Builder::with_capacity(10);
668 let mut bool_builder = BooleanBuilder::new();
669
670 int_builder.append_value(1);
671 int_builder.append_value(2);
672 bool_builder.append_value(true);
673
674 let fields = vec![
675 Field::new("f1", DataType::Int32, false),
676 Field::new("f2", DataType::Boolean, false),
677 ];
678 let field_builders = vec![
679 Box::new(int_builder) as Box<dyn ArrayBuilder>,
680 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
681 ];
682
683 let mut builder = StructBuilder::new(fields, field_builders);
684 builder.append(true);
685 builder.append(true);
686 builder.finish();
687 }
688
689 #[test]
690 #[should_panic(expected = "Number of fields is not equal to the number of field_builders.")]
691 fn test_struct_array_builder_unequal_field_field_builders() {
692 let int_builder = Int32Builder::with_capacity(10);
693
694 let fields = vec![
695 Field::new("f1", DataType::Int32, false),
696 Field::new("f2", DataType::Boolean, false),
697 ];
698 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
699
700 let mut builder = StructBuilder::new(fields, field_builders);
701 builder.finish();
702 }
703
704 #[test]
705 #[should_panic(
706 expected = "Incorrect datatype for StructArray field \\\"timestamp\\\", expected Timestamp(ns, \\\"UTC\\\") got Timestamp(ns)"
707 )]
708 fn test_struct_array_mismatch_builder() {
709 let fields = vec![Field::new(
710 "timestamp",
711 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_owned().into())),
712 false,
713 )];
714
715 let field_builders: Vec<Box<dyn ArrayBuilder>> =
716 vec![Box::new(TimestampNanosecondBuilder::new())];
717
718 let mut sa = StructBuilder::new(fields, field_builders);
719 sa.finish();
720 }
721
722 #[test]
723 fn test_empty() {
724 let mut builder = StructBuilder::new(Fields::empty(), vec![]);
725 builder.append(true);
726 builder.append(false);
727
728 let a1 = builder.finish_cloned();
729 let a2 = builder.finish();
730 assert_eq!(a1, a2);
731 assert_eq!(a1.len(), 2);
732 assert_eq!(a1.null_count(), 1);
733 assert!(a1.is_valid(0));
734 assert!(a1.is_null(1));
735 }
736
737 #[test]
738 fn test_append_non_nulls() {
739 let int_builder = Int32Builder::new();
740 let fields = vec![Field::new("f1", DataType::Int32, false)];
741 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
742
743 let mut builder = StructBuilder::new(fields, field_builders);
744 builder
745 .field_builder::<Int32Builder>(0)
746 .unwrap()
747 .append_slice(&[1, 2, 3, 4, 5]);
748 builder.append_non_nulls(5);
749
750 let arr = builder.finish();
751 assert_eq!(arr.len(), 5);
752 assert_eq!(arr.null_count(), 0);
753 for i in 0..5 {
754 assert!(arr.is_valid(i));
755 }
756 }
757
758 #[test]
759 fn test_append_non_nulls_with_nulls() {
760 let mut builder = StructBuilder::new(Fields::empty(), vec![]);
761 builder.append_null();
762 builder.append_non_nulls(3);
763 builder.append_nulls(2);
764 builder.append_non_nulls(1);
765
766 let arr = builder.finish();
767 assert_eq!(arr.len(), 7);
768 assert_eq!(arr.null_count(), 3);
769 assert!(arr.is_null(0));
770 assert!(arr.is_valid(1));
771 assert!(arr.is_valid(2));
772 assert!(arr.is_valid(3));
773 assert!(arr.is_null(4));
774 assert!(arr.is_null(5));
775 assert!(arr.is_valid(6));
776 }
777
778 #[test]
779 fn test_append_non_nulls_zero() {
780 let mut builder = StructBuilder::new(Fields::empty(), vec![]);
781 builder.append_non_nulls(0);
782 assert_eq!(builder.len(), 0);
783
784 builder.append(true);
785 builder.append_non_nulls(0);
786 assert_eq!(builder.len(), 1);
787
788 let arr = builder.finish();
789 assert_eq!(arr.len(), 1);
790 assert_eq!(arr.null_count(), 0);
791 }
792}