1use crate::builder::*;
19use crate::StructArray;
20use arrow_buffer::NullBufferBuilder;
21use arrow_schema::{Fields, SchemaBuilder};
22use std::sync::Arc;
23
24pub struct StructBuilder {
103 fields: Fields,
104 field_builders: Vec<Box<dyn ArrayBuilder>>,
105 null_buffer_builder: NullBufferBuilder,
106}
107
108impl std::fmt::Debug for StructBuilder {
109 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110 f.debug_struct("StructBuilder")
111 .field("fields", &self.fields)
112 .field("bitmap_builder", &self.null_buffer_builder)
113 .field("len", &self.len())
114 .finish()
115 }
116}
117
118impl ArrayBuilder for StructBuilder {
119 fn len(&self) -> usize {
125 self.null_buffer_builder.len()
126 }
127
128 fn finish(&mut self) -> ArrayRef {
130 Arc::new(self.finish())
131 }
132
133 fn finish_cloned(&self) -> ArrayRef {
135 Arc::new(self.finish_cloned())
136 }
137
138 fn as_any(&self) -> &dyn Any {
144 self
145 }
146
147 fn as_any_mut(&mut self) -> &mut dyn Any {
153 self
154 }
155
156 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
158 self
159 }
160}
161
162impl StructBuilder {
163 pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
165 Self {
166 field_builders,
167 fields: fields.into(),
168 null_buffer_builder: NullBufferBuilder::new(0),
169 }
170 }
171
172 pub fn from_fields(fields: impl Into<Fields>, capacity: usize) -> Self {
174 let fields = fields.into();
175 let mut builders = Vec::with_capacity(fields.len());
176 for field in &fields {
177 builders.push(make_builder(field.data_type(), capacity));
178 }
179 Self::new(fields, builders)
180 }
181
182 pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
186 self.field_builders[i].as_any_mut().downcast_mut::<T>()
187 }
188
189 pub fn field_builders(&self) -> &[Box<dyn ArrayBuilder>] {
191 &self.field_builders
192 }
193
194 pub fn field_builders_mut(&mut self) -> &mut [Box<dyn ArrayBuilder>] {
196 &mut self.field_builders
197 }
198
199 pub fn num_fields(&self) -> usize {
201 self.field_builders.len()
202 }
203
204 #[inline]
207 pub fn append(&mut self, is_valid: bool) {
208 self.null_buffer_builder.append(is_valid);
209 }
210
211 #[inline]
213 pub fn append_null(&mut self) {
214 self.append(false)
215 }
216
217 pub fn finish(&mut self) -> StructArray {
219 self.validate_content();
220 if self.fields.is_empty() {
221 return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
222 }
223
224 let arrays = self.field_builders.iter_mut().map(|f| f.finish()).collect();
225 let nulls = self.null_buffer_builder.finish();
226 StructArray::new(self.fields.clone(), arrays, nulls)
227 }
228
229 pub fn finish_cloned(&self) -> StructArray {
231 self.validate_content();
232
233 if self.fields.is_empty() {
234 return StructArray::new_empty_fields(
235 self.len(),
236 self.null_buffer_builder.finish_cloned(),
237 );
238 }
239
240 let arrays = self
241 .field_builders
242 .iter()
243 .map(|f| f.finish_cloned())
244 .collect();
245
246 let nulls = self.null_buffer_builder.finish_cloned();
247
248 StructArray::new(self.fields.clone(), arrays, nulls)
249 }
250
251 fn validate_content(&self) {
255 if self.fields.len() != self.field_builders.len() {
256 panic!("Number of fields is not equal to the number of field_builders.");
257 }
258 self.field_builders.iter().enumerate().for_each(|(idx, x)| {
259 if x.len() != self.len() {
260 let builder = SchemaBuilder::from(&self.fields);
261 let schema = builder.finish();
262
263 panic!("{}", format!(
264 "StructBuilder ({:?}) and field_builder with index {} ({:?}) are of unequal lengths: ({} != {}).",
265 schema,
266 idx,
267 self.fields[idx].data_type(),
268 self.len(),
269 x.len()
270 ));
271 }
272 });
273 }
274
275 pub fn validity_slice(&self) -> Option<&[u8]> {
277 self.null_buffer_builder.as_slice()
278 }
279}
280
281#[cfg(test)]
282mod tests {
283 use std::any::type_name;
284
285 use super::*;
286 use arrow_buffer::Buffer;
287 use arrow_data::ArrayData;
288 use arrow_schema::Field;
289
290 use crate::{array::Array, types::ArrowDictionaryKeyType};
291
292 #[test]
293 fn test_struct_array_builder() {
294 let string_builder = StringBuilder::new();
295 let int_builder = Int32Builder::new();
296
297 let fields = vec![
298 Field::new("f1", DataType::Utf8, true),
299 Field::new("f2", DataType::Int32, true),
300 ];
301 let field_builders = vec![
302 Box::new(string_builder) as Box<dyn ArrayBuilder>,
303 Box::new(int_builder) as Box<dyn ArrayBuilder>,
304 ];
305
306 let mut builder = StructBuilder::new(fields, field_builders);
307 assert_eq!(2, builder.num_fields());
308
309 let string_builder = builder
310 .field_builder::<StringBuilder>(0)
311 .expect("builder at field 0 should be string builder");
312 string_builder.append_value("joe");
313 string_builder.append_null();
314 string_builder.append_null();
315 string_builder.append_value("mark");
316
317 let int_builder = builder
318 .field_builder::<Int32Builder>(1)
319 .expect("builder at field 1 should be int builder");
320 int_builder.append_value(1);
321 int_builder.append_value(2);
322 int_builder.append_null();
323 int_builder.append_value(4);
324
325 builder.append(true);
326 builder.append(true);
327 builder.append_null();
328 builder.append(true);
329
330 let struct_data = builder.finish().into_data();
331
332 assert_eq!(4, struct_data.len());
333 assert_eq!(1, struct_data.null_count());
334 assert_eq!(&[11_u8], struct_data.nulls().unwrap().validity());
335
336 let expected_string_data = ArrayData::builder(DataType::Utf8)
337 .len(4)
338 .null_bit_buffer(Some(Buffer::from(&[9_u8])))
339 .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7]))
340 .add_buffer(Buffer::from_slice_ref(b"joemark"))
341 .build()
342 .unwrap();
343
344 let expected_int_data = ArrayData::builder(DataType::Int32)
345 .len(4)
346 .null_bit_buffer(Some(Buffer::from_slice_ref([11_u8])))
347 .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4]))
348 .build()
349 .unwrap();
350
351 assert_eq!(expected_string_data, struct_data.child_data()[0]);
352 assert_eq!(expected_int_data, struct_data.child_data()[1]);
353 }
354
355 #[test]
356 fn test_struct_array_builder_finish() {
357 let int_builder = Int32Builder::new();
358 let bool_builder = BooleanBuilder::new();
359
360 let fields = vec![
361 Field::new("f1", DataType::Int32, false),
362 Field::new("f2", DataType::Boolean, false),
363 ];
364 let field_builders = vec![
365 Box::new(int_builder) as Box<dyn ArrayBuilder>,
366 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
367 ];
368
369 let mut builder = StructBuilder::new(fields, field_builders);
370 builder
371 .field_builder::<Int32Builder>(0)
372 .unwrap()
373 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
374 builder
375 .field_builder::<BooleanBuilder>(1)
376 .unwrap()
377 .append_slice(&[
378 false, true, false, true, false, true, false, true, false, true,
379 ]);
380
381 for _ in 0..10 {
383 builder.append(true);
384 }
385
386 assert_eq!(10, builder.len());
387
388 let arr = builder.finish();
389
390 assert_eq!(10, arr.len());
391 assert_eq!(0, builder.len());
392
393 builder
394 .field_builder::<Int32Builder>(0)
395 .unwrap()
396 .append_slice(&[1, 3, 5, 7, 9]);
397 builder
398 .field_builder::<BooleanBuilder>(1)
399 .unwrap()
400 .append_slice(&[false, true, false, true, false]);
401
402 for _ in 0..5 {
404 builder.append(true);
405 }
406
407 assert_eq!(5, builder.len());
408
409 let arr = builder.finish();
410
411 assert_eq!(5, arr.len());
412 assert_eq!(0, builder.len());
413 }
414
415 #[test]
416 fn test_build_fixed_size_list() {
417 const LIST_LENGTH: i32 = 4;
418 let fixed_size_list_dtype =
419 DataType::new_fixed_size_list(DataType::Int32, LIST_LENGTH, false);
420 let mut builder = make_builder(&fixed_size_list_dtype, 10);
421 let builder = builder
422 .as_any_mut()
423 .downcast_mut::<FixedSizeListBuilder<Box<dyn ArrayBuilder>>>();
424 match builder {
425 Some(builder) => {
426 assert_eq!(builder.value_length(), LIST_LENGTH);
427 assert!(builder
428 .values()
429 .as_any_mut()
430 .downcast_mut::<Int32Builder>()
431 .is_some());
432 }
433 None => panic!("expected FixedSizeListBuilder, got a different builder type"),
434 }
435 }
436
437 #[test]
438 fn test_struct_array_builder_finish_cloned() {
439 let int_builder = Int32Builder::new();
440 let bool_builder = BooleanBuilder::new();
441
442 let fields = vec![
443 Field::new("f1", DataType::Int32, false),
444 Field::new("f2", DataType::Boolean, false),
445 ];
446 let field_builders = vec![
447 Box::new(int_builder) as Box<dyn ArrayBuilder>,
448 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
449 ];
450
451 let mut builder = StructBuilder::new(fields, field_builders);
452 builder
453 .field_builder::<Int32Builder>(0)
454 .unwrap()
455 .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
456 builder
457 .field_builder::<BooleanBuilder>(1)
458 .unwrap()
459 .append_slice(&[
460 false, true, false, true, false, true, false, true, false, true,
461 ]);
462
463 for _ in 0..10 {
465 builder.append(true);
466 }
467
468 assert_eq!(10, builder.len());
469
470 let mut arr = builder.finish_cloned();
471
472 assert_eq!(10, arr.len());
473 assert_eq!(10, builder.len());
474
475 builder
476 .field_builder::<Int32Builder>(0)
477 .unwrap()
478 .append_slice(&[1, 3, 5, 7, 9]);
479 builder
480 .field_builder::<BooleanBuilder>(1)
481 .unwrap()
482 .append_slice(&[false, true, false, true, false]);
483
484 for _ in 0..5 {
486 builder.append(true);
487 }
488
489 assert_eq!(15, builder.len());
490
491 arr = builder.finish();
492
493 assert_eq!(15, arr.len());
494 assert_eq!(0, builder.len());
495 }
496
497 #[test]
498 fn test_struct_array_builder_from_schema() {
499 let mut fields = vec![
500 Field::new("f1", DataType::Float32, false),
501 Field::new("f2", DataType::Utf8, false),
502 ];
503 let sub_fields = vec![
504 Field::new("g1", DataType::Int32, false),
505 Field::new("g2", DataType::Boolean, false),
506 ];
507 let struct_type = DataType::Struct(sub_fields.into());
508 fields.push(Field::new("f3", struct_type, false));
509
510 let mut builder = StructBuilder::from_fields(fields, 5);
511 assert_eq!(3, builder.num_fields());
512 assert!(builder.field_builder::<Float32Builder>(0).is_some());
513 assert!(builder.field_builder::<StringBuilder>(1).is_some());
514 assert!(builder.field_builder::<StructBuilder>(2).is_some());
515 }
516
517 #[test]
518 fn test_datatype_properties() {
519 let fields = Fields::from(vec![
520 Field::new("f1", DataType::Decimal128(1, 2), false),
521 Field::new(
522 "f2",
523 DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
524 false,
525 ),
526 ]);
527 let mut builder = StructBuilder::from_fields(fields.clone(), 1);
528 builder
529 .field_builder::<Decimal128Builder>(0)
530 .unwrap()
531 .append_value(1);
532 builder
533 .field_builder::<TimestampMillisecondBuilder>(1)
534 .unwrap()
535 .append_value(1);
536 builder.append(true);
537 let array = builder.finish();
538
539 assert_eq!(array.data_type(), &DataType::Struct(fields.clone()));
540 assert_eq!(array.column(0).data_type(), fields[0].data_type());
541 assert_eq!(array.column(1).data_type(), fields[1].data_type());
542 }
543
544 #[test]
545 fn test_struct_array_builder_from_dictionary_type_int8_key() {
546 test_struct_array_builder_from_dictionary_type_inner::<Int8Type>(DataType::Int8);
547 }
548
549 #[test]
550 fn test_struct_array_builder_from_dictionary_type_int16_key() {
551 test_struct_array_builder_from_dictionary_type_inner::<Int16Type>(DataType::Int16);
552 }
553
554 #[test]
555 fn test_struct_array_builder_from_dictionary_type_int32_key() {
556 test_struct_array_builder_from_dictionary_type_inner::<Int32Type>(DataType::Int32);
557 }
558
559 #[test]
560 fn test_struct_array_builder_from_dictionary_type_int64_key() {
561 test_struct_array_builder_from_dictionary_type_inner::<Int64Type>(DataType::Int64);
562 }
563
564 fn test_struct_array_builder_from_dictionary_type_inner<K: ArrowDictionaryKeyType>(
565 key_type: DataType,
566 ) {
567 let dict_field = Field::new(
568 "f1",
569 DataType::Dictionary(Box::new(key_type), Box::new(DataType::Utf8)),
570 false,
571 );
572 let fields = vec![dict_field.clone()];
573 let expected_dtype = DataType::Struct(fields.into());
574 let cloned_dict_field = dict_field.clone();
575 let expected_child_dtype = dict_field.data_type();
576 let mut struct_builder = StructBuilder::from_fields(vec![cloned_dict_field], 5);
577 let Some(dict_builder) = struct_builder.field_builder::<StringDictionaryBuilder<K>>(0)
578 else {
579 panic!(
580 "Builder should be StringDictionaryBuilder<{}>",
581 type_name::<K>()
582 )
583 };
584 dict_builder.append_value("dict string");
585 struct_builder.append(true);
586 let array = struct_builder.finish();
587
588 assert_eq!(array.data_type(), &expected_dtype);
589 assert_eq!(array.column(0).data_type(), expected_child_dtype);
590 assert_eq!(array.column(0).len(), 1);
591 }
592
593 #[test]
594 #[should_panic(
595 expected = "Data type Dictionary(UInt64, Utf8) with key type UInt64 is not currently supported"
596 )]
597 fn test_struct_array_builder_from_schema_unsupported_type() {
598 let fields = vec![
599 Field::new("f1", DataType::UInt64, false),
600 Field::new(
601 "f2",
602 DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
603 false,
604 ),
605 ];
606
607 let _ = StructBuilder::from_fields(fields, 5);
608 }
609
610 #[test]
611 #[should_panic(expected = "Dictionary value type Int32 is not currently supported")]
612 fn test_struct_array_builder_from_dict_with_unsupported_value_type() {
613 let fields = vec![Field::new(
614 "f1",
615 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32)),
616 false,
617 )];
618
619 let _ = StructBuilder::from_fields(fields, 5);
620 }
621
622 #[test]
623 fn test_struct_array_builder_field_builder_type_mismatch() {
624 let int_builder = Int32Builder::with_capacity(10);
625
626 let fields = vec![Field::new("f1", DataType::Int32, false)];
627 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
628
629 let mut builder = StructBuilder::new(fields, field_builders);
630 assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
631 }
632
633 #[test]
634 #[should_panic(
635 expected = "StructBuilder (Schema { fields: [Field { name: \"f1\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"f2\", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
636 )]
637 fn test_struct_array_builder_unequal_field_builders_lengths() {
638 let mut int_builder = Int32Builder::with_capacity(10);
639 let mut bool_builder = BooleanBuilder::new();
640
641 int_builder.append_value(1);
642 int_builder.append_value(2);
643 bool_builder.append_value(true);
644
645 let fields = vec![
646 Field::new("f1", DataType::Int32, false),
647 Field::new("f2", DataType::Boolean, false),
648 ];
649 let field_builders = vec![
650 Box::new(int_builder) as Box<dyn ArrayBuilder>,
651 Box::new(bool_builder) as Box<dyn ArrayBuilder>,
652 ];
653
654 let mut builder = StructBuilder::new(fields, field_builders);
655 builder.append(true);
656 builder.append(true);
657 builder.finish();
658 }
659
660 #[test]
661 #[should_panic(expected = "Number of fields is not equal to the number of field_builders.")]
662 fn test_struct_array_builder_unequal_field_field_builders() {
663 let int_builder = Int32Builder::with_capacity(10);
664
665 let fields = vec![
666 Field::new("f1", DataType::Int32, false),
667 Field::new("f2", DataType::Boolean, false),
668 ];
669 let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
670
671 let mut builder = StructBuilder::new(fields, field_builders);
672 builder.finish();
673 }
674
675 #[test]
676 #[should_panic(
677 expected = "Incorrect datatype for StructArray field \\\"timestamp\\\", expected Timestamp(Nanosecond, Some(\\\"UTC\\\")) got Timestamp(Nanosecond, None)"
678 )]
679 fn test_struct_array_mismatch_builder() {
680 let fields = vec![Field::new(
681 "timestamp",
682 DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_owned().into())),
683 false,
684 )];
685
686 let field_builders: Vec<Box<dyn ArrayBuilder>> =
687 vec![Box::new(TimestampNanosecondBuilder::new())];
688
689 let mut sa = StructBuilder::new(fields, field_builders);
690 sa.finish();
691 }
692
693 #[test]
694 fn test_empty() {
695 let mut builder = StructBuilder::new(Fields::empty(), vec![]);
696 builder.append(true);
697 builder.append(false);
698
699 let a1 = builder.finish_cloned();
700 let a2 = builder.finish();
701 assert_eq!(a1, a2);
702 assert_eq!(a1.len(), 2);
703 assert_eq!(a1.null_count(), 1);
704 assert!(a1.is_valid(0));
705 assert!(a1.is_null(1));
706 }
707}