1use crate::builder::*;
19use crate::StructArray;
20use arrow_buffer::NullBufferBuilder;
21use arrow_schema::{Fields, SchemaBuilder};
22use std::sync::Arc;
23
24pub struct StructBuilder {
103 fields: Fields,
104 field_builders: Vec<Box<dyn ArrayBuilder>>,
105 null_buffer_builder: NullBufferBuilder,
106}
107
108impl std::fmt::Debug for StructBuilder {
109 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110 f.debug_struct("StructBuilder")
111 .field("fields", &self.fields)
112 .field("bitmap_builder", &self.null_buffer_builder)
113 .field("len", &self.len())
114 .finish()
115 }
116}
117
118impl ArrayBuilder for StructBuilder {
119 fn len(&self) -> usize {
125 self.null_buffer_builder.len()
126 }
127
128 fn finish(&mut self) -> ArrayRef {
130 Arc::new(self.finish())
131 }
132
133 fn finish_cloned(&self) -> ArrayRef {
135 Arc::new(self.finish_cloned())
136 }
137
138 fn as_any(&self) -> &dyn Any {
144 self
145 }
146
147 fn as_any_mut(&mut self) -> &mut dyn Any {
153 self
154 }
155
156 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
158 self
159 }
160}
161
162impl StructBuilder {
163 pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
165 Self {
166 field_builders,
167 fields: fields.into(),
168 null_buffer_builder: NullBufferBuilder::new(0),
169 }
170 }
171
172 pub fn from_fields(fields: impl Into<Fields>, capacity: usize) -> Self {
174 let fields = fields.into();
175 let mut builders = Vec::with_capacity(fields.len());
176 for field in &fields {
177 builders.push(make_builder(field.data_type(), capacity));
178 }
179 Self::new(fields, builders)
180 }
181
182 pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
186 self.field_builders[i].as_any_mut().downcast_mut::<T>()
187 }
188
189 pub fn field_builders(&self) -> &[Box<dyn ArrayBuilder>] {
191 &self.field_builders
192 }
193
194 pub fn field_builders_mut(&mut self) -> &mut [Box<dyn ArrayBuilder>] {
196 &mut self.field_builders
197 }
198
199 pub fn num_fields(&self) -> usize {
201 self.field_builders.len()
202 }
203
204 #[inline]
207 pub fn append(&mut self, is_valid: bool) {
208 self.null_buffer_builder.append(is_valid);
209 }
210
211 #[inline]
213 pub fn append_null(&mut self) {
214 self.append(false)
215 }
216
217 #[inline]
219 pub fn append_nulls(&mut self, n: usize) {
220 self.null_buffer_builder.append_slice(&vec![false; n]);
221 }
222
223 pub fn finish(&mut self) -> StructArray {
225 self.validate_content();
226 if self.fields.is_empty() {
227 return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
228 }
229
230 let arrays = self.field_builders.iter_mut().map(|f| f.finish()).collect();
231 let nulls = self.null_buffer_builder.finish();
232 StructArray::new(self.fields.clone(), arrays, nulls)
233 }
234
235 pub fn finish_cloned(&self) -> StructArray {
237 self.validate_content();
238
239 if self.fields.is_empty() {
240 return StructArray::new_empty_fields(
241 self.len(),
242 self.null_buffer_builder.finish_cloned(),
243 );
244 }
245
246 let arrays = self
247 .field_builders
248 .iter()
249 .map(|f| f.finish_cloned())
250 .collect();
251
252 let nulls = self.null_buffer_builder.finish_cloned();
253
254 StructArray::new(self.fields.clone(), arrays, nulls)
255 }
256
257 fn validate_content(&self) {
261 if self.fields.len() != self.field_builders.len() {
262 panic!("Number of fields is not equal to the number of field_builders.");
263 }
264 self.field_builders.iter().enumerate().for_each(|(idx, x)| {
265 if x.len() != self.len() {
266 let builder = SchemaBuilder::from(&self.fields);
267 let schema = builder.finish();
268
269 panic!("{}", format!(
270 "StructBuilder ({:?}) and field_builder with index {} ({:?}) are of unequal lengths: ({} != {}).",
271 schema,
272 idx,
273 self.fields[idx].data_type(),
274 self.len(),
275 x.len()
276 ));
277 }
278 });
279 }
280
281 pub fn validity_slice(&self) -> Option<&[u8]> {
283 self.null_buffer_builder.as_slice()
284 }
285}
286
287#[cfg(test)]
288mod tests {
289 use std::any::type_name;
290
291 use super::*;
292 use arrow_buffer::Buffer;
293 use arrow_data::ArrayData;
294 use arrow_schema::Field;
295
296 use crate::{array::Array, types::ArrowDictionaryKeyType};
297
298 #[test]
299 fn test_struct_array_builder() {
300 let string_builder = StringBuilder::new();
301 let int_builder = Int32Builder::new();
302
303 let fields = vec![
304 Field::new("f1", DataType::Utf8, true),
305 Field::new("f2", DataType::Int32, true),
306 ];
307 let field_builders = vec![
308 Box::new(string_builder) as Box<dyn ArrayBuilder>,
309 Box::new(int_builder) as Box<dyn ArrayBuilder>,
310 ];
311
312 let mut builder = StructBuilder::new(fields, field_builders);
313 assert_eq!(2, builder.num_fields());
314
315 let string_builder = builder
316 .field_builder::<StringBuilder>(0)
317 .expect("builder at field 0 should be string builder");
318 string_builder.append_value("joe");
319 string_builder.append_null();
320 string_builder.append_null();
321 string_builder.append_value("mark");
322 string_builder.append_nulls(2);
323 string_builder.append_value("terry");
324
325 let int_builder = builder
326 .field_builder::<Int32Builder>(1)
327 .expect("builder at field 1 should be int builder");
328 int_builder.append_value(1);
329 int_builder.append_value(2);
330 int_builder.append_null();
331 int_builder.append_value(4);
332 int_builder.append_nulls(2);
333 int_builder.append_value(3);
334
335 builder.append(true);
336 builder.append(true);
337 builder.append_null();
338 builder.append(true);
339
340 builder.append_nulls(2);
341 builder.append(true);
342
343 let struct_data = builder.finish().into_data();
344
345 assert_eq!(7, struct_data.len());
346 assert_eq!(3, struct_data.null_count());
347 assert_eq!(&[75_u8], struct_data.nulls().unwrap().validity());
348
349 let expected_string_data = ArrayData::builder(DataType::Utf8)
350 .len(7)
351 .null_bit_buffer(Some(Buffer::from(&[73_u8])))
352 .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7, 7, 7, 12]))
353 .add_buffer(Buffer::from_slice_ref(b"joemarkterry"))
354 .build()
355 .unwrap();
356
357 let expected_int_data = ArrayData::builder(DataType::Int32)
358 .len(7)
359 .null_bit_buffer(Some(Buffer::from_slice_ref([75_u8])))
360 .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4, 4, 4, 3]))
361 .build()
362 .unwrap();
363
364 assert_eq!(expected_string_data, struct_data.child_data()[0]);
365 assert_eq!(expected_int_data, struct_data.child_data()[1]);
366
367 assert!(struct_data.is_null(4));
368 assert!(struct_data.is_null(5));
369 }
370
371 #[test]
372 fn test_struct_array_builder_finish() {
373 let int_builder = Int32Builder::new();
374 let bool_builder = BooleanBuilder::new();
375
376 let fields = vec![
377 Field::new("f1", DataType::Int32, false),
378 Field::new("f2", DataType::Boolean, false),
379 ];
380 let field_builders = vec![
381 Box::new(int_builder) as Box<dyn ArrayBuilder>,
382 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
383 ];
384
385 let mut builder = StructBuilder::new(fields, field_builders);
386 builder
387 .field_builder::<Int32Builder>(0)
388 .unwrap()
389 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
390 builder
391 .field_builder::<BooleanBuilder>(1)
392 .unwrap()
393 .append_slice(&[
394 false, true, false, true, false, true, false, true, false, true,
395 ]);
396
397 for _ in 0..10 {
399 builder.append(true);
400 }
401
402 assert_eq!(10, builder.len());
403
404 let arr = builder.finish();
405
406 assert_eq!(10, arr.len());
407 assert_eq!(0, builder.len());
408
409 builder
410 .field_builder::<Int32Builder>(0)
411 .unwrap()
412 .append_slice(&[1, 3, 5, 7, 9]);
413 builder
414 .field_builder::<BooleanBuilder>(1)
415 .unwrap()
416 .append_slice(&[false, true, false, true, false]);
417
418 for _ in 0..5 {
420 builder.append(true);
421 }
422
423 assert_eq!(5, builder.len());
424
425 let arr = builder.finish();
426
427 assert_eq!(5, arr.len());
428 assert_eq!(0, builder.len());
429 }
430
431 #[test]
432 fn test_build_fixed_size_list() {
433 const LIST_LENGTH: i32 = 4;
434 let fixed_size_list_dtype =
435 DataType::new_fixed_size_list(DataType::Int32, LIST_LENGTH, false);
436 let mut builder = make_builder(&fixed_size_list_dtype, 10);
437 let builder = builder
438 .as_any_mut()
439 .downcast_mut::<FixedSizeListBuilder<Box<dyn ArrayBuilder>>>();
440 match builder {
441 Some(builder) => {
442 assert_eq!(builder.value_length(), LIST_LENGTH);
443 assert!(builder
444 .values()
445 .as_any_mut()
446 .downcast_mut::<Int32Builder>()
447 .is_some());
448 }
449 None => panic!("expected FixedSizeListBuilder, got a different builder type"),
450 }
451 }
452
453 #[test]
454 fn test_struct_array_builder_finish_cloned() {
455 let int_builder = Int32Builder::new();
456 let bool_builder = BooleanBuilder::new();
457
458 let fields = vec![
459 Field::new("f1", DataType::Int32, false),
460 Field::new("f2", DataType::Boolean, false),
461 ];
462 let field_builders = vec![
463 Box::new(int_builder) as Box<dyn ArrayBuilder>,
464 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
465 ];
466
467 let mut builder = StructBuilder::new(fields, field_builders);
468 builder
469 .field_builder::<Int32Builder>(0)
470 .unwrap()
471 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
472 builder
473 .field_builder::<BooleanBuilder>(1)
474 .unwrap()
475 .append_slice(&[
476 false, true, false, true, false, true, false, true, false, true,
477 ]);
478
479 for _ in 0..10 {
481 builder.append(true);
482 }
483
484 assert_eq!(10, builder.len());
485
486 let mut arr = builder.finish_cloned();
487
488 assert_eq!(10, arr.len());
489 assert_eq!(10, builder.len());
490
491 builder
492 .field_builder::<Int32Builder>(0)
493 .unwrap()
494 .append_slice(&[1, 3, 5, 7, 9]);
495 builder
496 .field_builder::<BooleanBuilder>(1)
497 .unwrap()
498 .append_slice(&[false, true, false, true, false]);
499
500 for _ in 0..5 {
502 builder.append(true);
503 }
504
505 assert_eq!(15, builder.len());
506
507 arr = builder.finish();
508
509 assert_eq!(15, arr.len());
510 assert_eq!(0, builder.len());
511 }
512
513 #[test]
514 fn test_struct_array_builder_from_schema() {
515 let mut fields = vec![
516 Field::new("f1", DataType::Float32, false),
517 Field::new("f2", DataType::Utf8, false),
518 ];
519 let sub_fields = vec![
520 Field::new("g1", DataType::Int32, false),
521 Field::new("g2", DataType::Boolean, false),
522 ];
523 let struct_type = DataType::Struct(sub_fields.into());
524 fields.push(Field::new("f3", struct_type, false));
525
526 let mut builder = StructBuilder::from_fields(fields, 5);
527 assert_eq!(3, builder.num_fields());
528 assert!(builder.field_builder::<Float32Builder>(0).is_some());
529 assert!(builder.field_builder::<StringBuilder>(1).is_some());
530 assert!(builder.field_builder::<StructBuilder>(2).is_some());
531 }
532
533 #[test]
534 fn test_datatype_properties() {
535 let fields = Fields::from(vec![
536 Field::new("f1", DataType::Decimal128(1, 2), false),
537 Field::new(
538 "f2",
539 DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
540 false,
541 ),
542 ]);
543 let mut builder = StructBuilder::from_fields(fields.clone(), 1);
544 builder
545 .field_builder::<Decimal128Builder>(0)
546 .unwrap()
547 .append_value(1);
548 builder
549 .field_builder::<TimestampMillisecondBuilder>(1)
550 .unwrap()
551 .append_value(1);
552 builder.append(true);
553 let array = builder.finish();
554
555 assert_eq!(array.data_type(), &DataType::Struct(fields.clone()));
556 assert_eq!(array.column(0).data_type(), fields[0].data_type());
557 assert_eq!(array.column(1).data_type(), fields[1].data_type());
558 }
559
560 #[test]
561 fn test_struct_array_builder_from_dictionary_type_int8_key() {
562 test_struct_array_builder_from_dictionary_type_inner::<Int8Type>(DataType::Int8);
563 }
564
565 #[test]
566 fn test_struct_array_builder_from_dictionary_type_int16_key() {
567 test_struct_array_builder_from_dictionary_type_inner::<Int16Type>(DataType::Int16);
568 }
569
570 #[test]
571 fn test_struct_array_builder_from_dictionary_type_int32_key() {
572 test_struct_array_builder_from_dictionary_type_inner::<Int32Type>(DataType::Int32);
573 }
574
575 #[test]
576 fn test_struct_array_builder_from_dictionary_type_int64_key() {
577 test_struct_array_builder_from_dictionary_type_inner::<Int64Type>(DataType::Int64);
578 }
579
580 fn test_struct_array_builder_from_dictionary_type_inner<K: ArrowDictionaryKeyType>(
581 key_type: DataType,
582 ) {
583 let dict_field = Field::new(
584 "f1",
585 DataType::Dictionary(Box::new(key_type), Box::new(DataType::Utf8)),
586 false,
587 );
588 let fields = vec![dict_field.clone()];
589 let expected_dtype = DataType::Struct(fields.into());
590 let cloned_dict_field = dict_field.clone();
591 let expected_child_dtype = dict_field.data_type();
592 let mut struct_builder = StructBuilder::from_fields(vec![cloned_dict_field], 5);
593 let Some(dict_builder) = struct_builder.field_builder::<StringDictionaryBuilder<K>>(0)
594 else {
595 panic!(
596 "Builder should be StringDictionaryBuilder<{}>",
597 type_name::<K>()
598 )
599 };
600 dict_builder.append_value("dict string");
601 struct_builder.append(true);
602 let array = struct_builder.finish();
603
604 assert_eq!(array.data_type(), &expected_dtype);
605 assert_eq!(array.column(0).data_type(), expected_child_dtype);
606 assert_eq!(array.column(0).len(), 1);
607 }
608
609 #[test]
610 #[should_panic(
611 expected = "Data type Dictionary(UInt64, Utf8) with key type UInt64 is not currently supported"
612 )]
613 fn test_struct_array_builder_from_schema_unsupported_type() {
614 let fields = vec![
615 Field::new("f1", DataType::UInt64, false),
616 Field::new(
617 "f2",
618 DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
619 false,
620 ),
621 ];
622
623 let _ = StructBuilder::from_fields(fields, 5);
624 }
625
626 #[test]
627 #[should_panic(expected = "Dictionary value type Int32 is not currently supported")]
628 fn test_struct_array_builder_from_dict_with_unsupported_value_type() {
629 let fields = vec![Field::new(
630 "f1",
631 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32)),
632 false,
633 )];
634
635 let _ = StructBuilder::from_fields(fields, 5);
636 }
637
638 #[test]
639 fn test_struct_array_builder_field_builder_type_mismatch() {
640 let int_builder = Int32Builder::with_capacity(10);
641
642 let fields = vec![Field::new("f1", DataType::Int32, false)];
643 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
644
645 let mut builder = StructBuilder::new(fields, field_builders);
646 assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
647 }
648
649 #[test]
650 #[should_panic(
651 expected = "StructBuilder (Schema { fields: [Field { name: \"f1\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"f2\", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
652 )]
653 fn test_struct_array_builder_unequal_field_builders_lengths() {
654 let mut int_builder = Int32Builder::with_capacity(10);
655 let mut bool_builder = BooleanBuilder::new();
656
657 int_builder.append_value(1);
658 int_builder.append_value(2);
659 bool_builder.append_value(true);
660
661 let fields = vec![
662 Field::new("f1", DataType::Int32, false),
663 Field::new("f2", DataType::Boolean, false),
664 ];
665 let field_builders = vec![
666 Box::new(int_builder) as Box<dyn ArrayBuilder>,
667 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
668 ];
669
670 let mut builder = StructBuilder::new(fields, field_builders);
671 builder.append(true);
672 builder.append(true);
673 builder.finish();
674 }
675
676 #[test]
677 #[should_panic(expected = "Number of fields is not equal to the number of field_builders.")]
678 fn test_struct_array_builder_unequal_field_field_builders() {
679 let int_builder = Int32Builder::with_capacity(10);
680
681 let fields = vec![
682 Field::new("f1", DataType::Int32, false),
683 Field::new("f2", DataType::Boolean, false),
684 ];
685 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
686
687 let mut builder = StructBuilder::new(fields, field_builders);
688 builder.finish();
689 }
690
691 #[test]
692 #[should_panic(
693 expected = "Incorrect datatype for StructArray field \\\"timestamp\\\", expected Timestamp(Nanosecond, Some(\\\"UTC\\\")) got Timestamp(Nanosecond, None)"
694 )]
695 fn test_struct_array_mismatch_builder() {
696 let fields = vec![Field::new(
697 "timestamp",
698 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_owned().into())),
699 false,
700 )];
701
702 let field_builders: Vec<Box<dyn ArrayBuilder>> =
703 vec![Box::new(TimestampNanosecondBuilder::new())];
704
705 let mut sa = StructBuilder::new(fields, field_builders);
706 sa.finish();
707 }
708
709 #[test]
710 fn test_empty() {
711 let mut builder = StructBuilder::new(Fields::empty(), vec![]);
712 builder.append(true);
713 builder.append(false);
714
715 let a1 = builder.finish_cloned();
716 let a2 = builder.finish();
717 assert_eq!(a1, a2);
718 assert_eq!(a1.len(), 2);
719 assert_eq!(a1.null_count(), 1);
720 assert!(a1.is_valid(0));
721 assert!(a1.is_null(1));
722 }
723}