1use crate::StructArray;
19use crate::builder::*;
20use arrow_buffer::NullBufferBuilder;
21use arrow_schema::{Fields, SchemaBuilder};
22use std::sync::Arc;
23
24pub struct StructBuilder {
103 fields: Fields,
104 field_builders: Vec<Box<dyn ArrayBuilder>>,
105 null_buffer_builder: NullBufferBuilder,
106}
107
108impl std::fmt::Debug for StructBuilder {
109 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110 f.debug_struct("StructBuilder")
111 .field("fields", &self.fields)
112 .field("bitmap_builder", &self.null_buffer_builder)
113 .field("len", &self.len())
114 .finish()
115 }
116}
117
118impl ArrayBuilder for StructBuilder {
119 fn len(&self) -> usize {
125 self.null_buffer_builder.len()
126 }
127
128 fn finish(&mut self) -> ArrayRef {
130 Arc::new(self.finish())
131 }
132
133 fn finish_cloned(&self) -> ArrayRef {
135 Arc::new(self.finish_cloned())
136 }
137
138 fn as_any(&self) -> &dyn Any {
144 self
145 }
146
147 fn as_any_mut(&mut self) -> &mut dyn Any {
153 self
154 }
155
156 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
158 self
159 }
160}
161
162impl StructBuilder {
163 pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
165 Self {
166 field_builders,
167 fields: fields.into(),
168 null_buffer_builder: NullBufferBuilder::new(0),
169 }
170 }
171
172 pub fn from_fields(fields: impl Into<Fields>, capacity: usize) -> Self {
174 let fields = fields.into();
175 let mut builders = Vec::with_capacity(fields.len());
176 for field in &fields {
177 builders.push(make_builder(field.data_type(), capacity));
178 }
179 Self::new(fields, builders)
180 }
181
182 pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
186 self.field_builders[i].as_any_mut().downcast_mut::<T>()
187 }
188
189 pub fn field_builders(&self) -> &[Box<dyn ArrayBuilder>] {
191 &self.field_builders
192 }
193
194 pub fn field_builders_mut(&mut self) -> &mut [Box<dyn ArrayBuilder>] {
196 &mut self.field_builders
197 }
198
199 pub fn num_fields(&self) -> usize {
201 self.field_builders.len()
202 }
203
204 pub fn fields(&self) -> &Fields {
206 &self.fields
207 }
208
209 #[inline]
212 pub fn append(&mut self, is_valid: bool) {
213 self.null_buffer_builder.append(is_valid);
214 }
215
216 #[inline]
218 pub fn append_null(&mut self) {
219 self.append(false)
220 }
221
222 #[inline]
224 pub fn append_nulls(&mut self, n: usize) {
225 self.null_buffer_builder.append_slice(&vec![false; n]);
226 }
227
228 pub fn finish(&mut self) -> StructArray {
230 self.validate_content();
231 if self.fields.is_empty() {
232 return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
233 }
234
235 let arrays = self.field_builders.iter_mut().map(|f| f.finish()).collect();
236 let nulls = self.null_buffer_builder.finish();
237 StructArray::new(self.fields.clone(), arrays, nulls)
238 }
239
240 pub fn finish_cloned(&self) -> StructArray {
242 self.validate_content();
243
244 if self.fields.is_empty() {
245 return StructArray::new_empty_fields(
246 self.len(),
247 self.null_buffer_builder.finish_cloned(),
248 );
249 }
250
251 let arrays = self
252 .field_builders
253 .iter()
254 .map(|f| f.finish_cloned())
255 .collect();
256
257 let nulls = self.null_buffer_builder.finish_cloned();
258
259 StructArray::new(self.fields.clone(), arrays, nulls)
260 }
261
262 fn validate_content(&self) {
266 if self.fields.len() != self.field_builders.len() {
267 panic!("Number of fields is not equal to the number of field_builders.");
268 }
269 self.field_builders.iter().enumerate().for_each(|(idx, x)| {
270 if x.len() != self.len() {
271 let builder = SchemaBuilder::from(&self.fields);
272 let schema = builder.finish();
273
274 panic!("{}", format!(
275 "StructBuilder ({}) and field_builder with index {} ({}) are of unequal lengths: ({} != {}).",
276 schema,
277 idx,
278 self.fields[idx].data_type(),
279 self.len(),
280 x.len()
281 ));
282 }
283 });
284 }
285
286 pub fn validity_slice(&self) -> Option<&[u8]> {
288 self.null_buffer_builder.as_slice()
289 }
290}
291
292#[cfg(test)]
293mod tests {
294 use std::any::type_name;
295
296 use super::*;
297 use arrow_buffer::Buffer;
298 use arrow_data::ArrayData;
299 use arrow_schema::Field;
300
301 use crate::{array::Array, types::ArrowDictionaryKeyType};
302
303 #[test]
304 fn test_struct_array_builder() {
305 let string_builder = StringBuilder::new();
306 let int_builder = Int32Builder::new();
307
308 let fields = vec![
309 Field::new("f1", DataType::Utf8, true),
310 Field::new("f2", DataType::Int32, true),
311 ];
312 let field_builders = vec![
313 Box::new(string_builder) as Box<dyn ArrayBuilder>,
314 Box::new(int_builder) as Box<dyn ArrayBuilder>,
315 ];
316
317 let mut builder = StructBuilder::new(fields, field_builders);
318 assert_eq!(2, builder.num_fields());
319
320 let string_builder = builder
321 .field_builder::<StringBuilder>(0)
322 .expect("builder at field 0 should be string builder");
323 string_builder.append_value("joe");
324 string_builder.append_null();
325 string_builder.append_null();
326 string_builder.append_value("mark");
327 string_builder.append_nulls(2);
328 string_builder.append_value("terry");
329
330 let int_builder = builder
331 .field_builder::<Int32Builder>(1)
332 .expect("builder at field 1 should be int builder");
333 int_builder.append_value(1);
334 int_builder.append_value(2);
335 int_builder.append_null();
336 int_builder.append_value(4);
337 int_builder.append_nulls(2);
338 int_builder.append_value(3);
339
340 builder.append(true);
341 builder.append(true);
342 builder.append_null();
343 builder.append(true);
344
345 builder.append_nulls(2);
346 builder.append(true);
347
348 let struct_data = builder.finish().into_data();
349
350 assert_eq!(7, struct_data.len());
351 assert_eq!(3, struct_data.null_count());
352 assert_eq!(&[75_u8], struct_data.nulls().unwrap().validity());
353
354 let expected_string_data = ArrayData::builder(DataType::Utf8)
355 .len(7)
356 .null_bit_buffer(Some(Buffer::from(&[73_u8])))
357 .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7, 7, 7, 12]))
358 .add_buffer(Buffer::from_slice_ref(b"joemarkterry"))
359 .build()
360 .unwrap();
361
362 let expected_int_data = ArrayData::builder(DataType::Int32)
363 .len(7)
364 .null_bit_buffer(Some(Buffer::from_slice_ref([75_u8])))
365 .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4, 4, 4, 3]))
366 .build()
367 .unwrap();
368
369 assert_eq!(expected_string_data, struct_data.child_data()[0]);
370 assert_eq!(expected_int_data, struct_data.child_data()[1]);
371
372 assert!(struct_data.is_null(4));
373 assert!(struct_data.is_null(5));
374 }
375
376 #[test]
377 fn test_struct_array_builder_finish() {
378 let int_builder = Int32Builder::new();
379 let bool_builder = BooleanBuilder::new();
380
381 let fields = vec![
382 Field::new("f1", DataType::Int32, false),
383 Field::new("f2", DataType::Boolean, false),
384 ];
385 let field_builders = vec![
386 Box::new(int_builder) as Box<dyn ArrayBuilder>,
387 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
388 ];
389
390 let mut builder = StructBuilder::new(fields, field_builders);
391 builder
392 .field_builder::<Int32Builder>(0)
393 .unwrap()
394 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
395 builder
396 .field_builder::<BooleanBuilder>(1)
397 .unwrap()
398 .append_slice(&[
399 false, true, false, true, false, true, false, true, false, true,
400 ]);
401
402 for _ in 0..10 {
404 builder.append(true);
405 }
406
407 assert_eq!(10, builder.len());
408
409 let arr = builder.finish();
410
411 assert_eq!(10, arr.len());
412 assert_eq!(0, builder.len());
413
414 builder
415 .field_builder::<Int32Builder>(0)
416 .unwrap()
417 .append_slice(&[1, 3, 5, 7, 9]);
418 builder
419 .field_builder::<BooleanBuilder>(1)
420 .unwrap()
421 .append_slice(&[false, true, false, true, false]);
422
423 for _ in 0..5 {
425 builder.append(true);
426 }
427
428 assert_eq!(5, builder.len());
429
430 let arr = builder.finish();
431
432 assert_eq!(5, arr.len());
433 assert_eq!(0, builder.len());
434 }
435
436 #[test]
437 fn test_build_fixed_size_list() {
438 const LIST_LENGTH: i32 = 4;
439 let fixed_size_list_dtype =
440 DataType::new_fixed_size_list(DataType::Int32, LIST_LENGTH, false);
441 let mut builder = make_builder(&fixed_size_list_dtype, 10);
442 let builder = builder
443 .as_any_mut()
444 .downcast_mut::<FixedSizeListBuilder<Box<dyn ArrayBuilder>>>();
445 match builder {
446 Some(builder) => {
447 assert_eq!(builder.value_length(), LIST_LENGTH);
448 assert!(
449 builder
450 .values()
451 .as_any_mut()
452 .downcast_mut::<Int32Builder>()
453 .is_some()
454 );
455 }
456 None => panic!("expected FixedSizeListBuilder, got a different builder type"),
457 }
458 }
459
460 #[test]
461 fn test_struct_array_builder_finish_cloned() {
462 let int_builder = Int32Builder::new();
463 let bool_builder = BooleanBuilder::new();
464
465 let fields = vec![
466 Field::new("f1", DataType::Int32, false),
467 Field::new("f2", DataType::Boolean, false),
468 ];
469 let field_builders = vec![
470 Box::new(int_builder) as Box<dyn ArrayBuilder>,
471 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
472 ];
473
474 let mut builder = StructBuilder::new(fields, field_builders);
475 builder
476 .field_builder::<Int32Builder>(0)
477 .unwrap()
478 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
479 builder
480 .field_builder::<BooleanBuilder>(1)
481 .unwrap()
482 .append_slice(&[
483 false, true, false, true, false, true, false, true, false, true,
484 ]);
485
486 for _ in 0..10 {
488 builder.append(true);
489 }
490
491 assert_eq!(10, builder.len());
492
493 let mut arr = builder.finish_cloned();
494
495 assert_eq!(10, arr.len());
496 assert_eq!(10, builder.len());
497
498 builder
499 .field_builder::<Int32Builder>(0)
500 .unwrap()
501 .append_slice(&[1, 3, 5, 7, 9]);
502 builder
503 .field_builder::<BooleanBuilder>(1)
504 .unwrap()
505 .append_slice(&[false, true, false, true, false]);
506
507 for _ in 0..5 {
509 builder.append(true);
510 }
511
512 assert_eq!(15, builder.len());
513
514 arr = builder.finish();
515
516 assert_eq!(15, arr.len());
517 assert_eq!(0, builder.len());
518 }
519
520 #[test]
521 fn test_struct_array_builder_from_schema() {
522 let mut fields = vec![
523 Field::new("f1", DataType::Float32, false),
524 Field::new("f2", DataType::Utf8, false),
525 ];
526 let sub_fields = vec![
527 Field::new("g1", DataType::Int32, false),
528 Field::new("g2", DataType::Boolean, false),
529 ];
530 let struct_type = DataType::Struct(sub_fields.into());
531 fields.push(Field::new("f3", struct_type, false));
532
533 let mut builder = StructBuilder::from_fields(fields, 5);
534 assert_eq!(3, builder.num_fields());
535 assert!(builder.field_builder::<Float32Builder>(0).is_some());
536 assert!(builder.field_builder::<StringBuilder>(1).is_some());
537 assert!(builder.field_builder::<StructBuilder>(2).is_some());
538 }
539
540 #[test]
541 fn test_datatype_properties() {
542 let fields = Fields::from(vec![
543 Field::new("f1", DataType::Decimal128(1, 2), false),
544 Field::new(
545 "f2",
546 DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
547 false,
548 ),
549 ]);
550 let mut builder = StructBuilder::from_fields(fields.clone(), 1);
551 builder
552 .field_builder::<Decimal128Builder>(0)
553 .unwrap()
554 .append_value(1);
555 builder
556 .field_builder::<TimestampMillisecondBuilder>(1)
557 .unwrap()
558 .append_value(1);
559 builder.append(true);
560 let array = builder.finish();
561
562 assert_eq!(array.data_type(), &DataType::Struct(fields.clone()));
563 assert_eq!(array.column(0).data_type(), fields[0].data_type());
564 assert_eq!(array.column(1).data_type(), fields[1].data_type());
565 }
566
567 #[test]
568 fn test_struct_array_builder_from_dictionary_type_int8_key() {
569 test_struct_array_builder_from_dictionary_type_inner::<Int8Type>(DataType::Int8);
570 }
571
572 #[test]
573 fn test_struct_array_builder_from_dictionary_type_int16_key() {
574 test_struct_array_builder_from_dictionary_type_inner::<Int16Type>(DataType::Int16);
575 }
576
577 #[test]
578 fn test_struct_array_builder_from_dictionary_type_int32_key() {
579 test_struct_array_builder_from_dictionary_type_inner::<Int32Type>(DataType::Int32);
580 }
581
582 #[test]
583 fn test_struct_array_builder_from_dictionary_type_int64_key() {
584 test_struct_array_builder_from_dictionary_type_inner::<Int64Type>(DataType::Int64);
585 }
586
587 fn test_struct_array_builder_from_dictionary_type_inner<K: ArrowDictionaryKeyType>(
588 key_type: DataType,
589 ) {
590 let dict_field = Field::new(
591 "f1",
592 DataType::Dictionary(Box::new(key_type), Box::new(DataType::Utf8)),
593 false,
594 );
595 let fields = vec![dict_field.clone()];
596 let expected_dtype = DataType::Struct(fields.into());
597 let cloned_dict_field = dict_field.clone();
598 let expected_child_dtype = dict_field.data_type();
599 let mut struct_builder = StructBuilder::from_fields(vec![cloned_dict_field], 5);
600 let Some(dict_builder) = struct_builder.field_builder::<StringDictionaryBuilder<K>>(0)
601 else {
602 panic!(
603 "Builder should be StringDictionaryBuilder<{}>",
604 type_name::<K>()
605 )
606 };
607 dict_builder.append_value("dict string");
608 struct_builder.append(true);
609 let array = struct_builder.finish();
610
611 assert_eq!(array.data_type(), &expected_dtype);
612 assert_eq!(array.column(0).data_type(), expected_child_dtype);
613 assert_eq!(array.column(0).len(), 1);
614 }
615
616 #[test]
617 #[should_panic(
618 expected = "Data type Dictionary(UInt64, Utf8) with key type UInt64 is not currently supported"
619 )]
620 fn test_struct_array_builder_from_schema_unsupported_type() {
621 let fields = vec![
622 Field::new("f1", DataType::UInt64, false),
623 Field::new(
624 "f2",
625 DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
626 false,
627 ),
628 ];
629
630 let _ = StructBuilder::from_fields(fields, 5);
631 }
632
633 #[test]
634 #[should_panic(expected = "Dictionary value type Int32 is not currently supported")]
635 fn test_struct_array_builder_from_dict_with_unsupported_value_type() {
636 let fields = vec![Field::new(
637 "f1",
638 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32)),
639 false,
640 )];
641
642 let _ = StructBuilder::from_fields(fields, 5);
643 }
644
645 #[test]
646 fn test_struct_array_builder_field_builder_type_mismatch() {
647 let int_builder = Int32Builder::with_capacity(10);
648
649 let fields = vec![Field::new("f1", DataType::Int32, false)];
650 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
651
652 let mut builder = StructBuilder::new(fields, field_builders);
653 assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
654 }
655
656 #[test]
657 #[should_panic(
658 expected = "StructBuilder (Field { \"f1\": Int32 }, Field { \"f2\": Boolean }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
659 )]
660 fn test_struct_array_builder_unequal_field_builders_lengths() {
661 let mut int_builder = Int32Builder::with_capacity(10);
662 let mut bool_builder = BooleanBuilder::new();
663
664 int_builder.append_value(1);
665 int_builder.append_value(2);
666 bool_builder.append_value(true);
667
668 let fields = vec![
669 Field::new("f1", DataType::Int32, false),
670 Field::new("f2", DataType::Boolean, false),
671 ];
672 let field_builders = vec![
673 Box::new(int_builder) as Box<dyn ArrayBuilder>,
674 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
675 ];
676
677 let mut builder = StructBuilder::new(fields, field_builders);
678 builder.append(true);
679 builder.append(true);
680 builder.finish();
681 }
682
683 #[test]
684 #[should_panic(expected = "Number of fields is not equal to the number of field_builders.")]
685 fn test_struct_array_builder_unequal_field_field_builders() {
686 let int_builder = Int32Builder::with_capacity(10);
687
688 let fields = vec![
689 Field::new("f1", DataType::Int32, false),
690 Field::new("f2", DataType::Boolean, false),
691 ];
692 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
693
694 let mut builder = StructBuilder::new(fields, field_builders);
695 builder.finish();
696 }
697
698 #[test]
699 #[should_panic(
700 expected = "Incorrect datatype for StructArray field \\\"timestamp\\\", expected Timestamp(ns, \\\"UTC\\\") got Timestamp(ns)"
701 )]
702 fn test_struct_array_mismatch_builder() {
703 let fields = vec![Field::new(
704 "timestamp",
705 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_owned().into())),
706 false,
707 )];
708
709 let field_builders: Vec<Box<dyn ArrayBuilder>> =
710 vec![Box::new(TimestampNanosecondBuilder::new())];
711
712 let mut sa = StructBuilder::new(fields, field_builders);
713 sa.finish();
714 }
715
716 #[test]
717 fn test_empty() {
718 let mut builder = StructBuilder::new(Fields::empty(), vec![]);
719 builder.append(true);
720 builder.append(false);
721
722 let a1 = builder.finish_cloned();
723 let a2 = builder.finish();
724 assert_eq!(a1, a2);
725 assert_eq!(a1.len(), 2);
726 assert_eq!(a1.null_count(), 1);
727 assert!(a1.is_valid(0));
728 assert!(a1.is_null(1));
729 }
730}