1use crate::builder::{ArrayBuilder, BufferBuilder};
19use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
20use arrow_buffer::NullBufferBuilder;
21use arrow_buffer::{Buffer, OffsetBuffer};
22use arrow_schema::{Field, FieldRef};
23use std::any::Any;
24use std::sync::Arc;
25
26#[derive(Debug)]
88pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
89 offsets_builder: BufferBuilder<OffsetSize>,
90 null_buffer_builder: NullBufferBuilder,
91 values_builder: T,
92 field: Option<FieldRef>,
93}
94
95impl<O: OffsetSizeTrait, T: ArrayBuilder + Default> Default for GenericListBuilder<O, T> {
96 fn default() -> Self {
97 Self::new(T::default())
98 }
99}
100
101impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> {
102 pub fn new(values_builder: T) -> Self {
104 let capacity = values_builder.len();
105 Self::with_capacity(values_builder, capacity)
106 }
107
108 pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
111 let mut offsets_builder = BufferBuilder::<OffsetSize>::new(capacity + 1);
112 offsets_builder.append(OffsetSize::zero());
113 Self {
114 offsets_builder,
115 null_buffer_builder: NullBufferBuilder::new(capacity),
116 values_builder,
117 field: None,
118 }
119 }
120
121 pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
128 Self {
129 field: Some(field.into()),
130 ..self
131 }
132 }
133}
134
135impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
136 for GenericListBuilder<OffsetSize, T>
137where
138 T: 'static,
139{
140 fn as_any(&self) -> &dyn Any {
142 self
143 }
144
145 fn as_any_mut(&mut self) -> &mut dyn Any {
147 self
148 }
149
150 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
152 self
153 }
154
155 fn len(&self) -> usize {
157 self.null_buffer_builder.len()
158 }
159
160 fn finish(&mut self) -> ArrayRef {
162 Arc::new(self.finish())
163 }
164
165 fn finish_cloned(&self) -> ArrayRef {
167 Arc::new(self.finish_cloned())
168 }
169}
170
171impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
172where
173 T: 'static,
174{
175 pub fn values(&mut self) -> &mut T {
180 &mut self.values_builder
181 }
182
183 pub fn values_ref(&self) -> &T {
185 &self.values_builder
186 }
187
188 #[inline]
194 pub fn append(&mut self, is_valid: bool) {
195 self.offsets_builder.append(self.next_offset());
196 self.null_buffer_builder.append(is_valid);
197 }
198
199 #[inline]
205 fn next_offset(&self) -> OffsetSize {
206 OffsetSize::from_usize(self.values_builder.len()).unwrap()
207 }
208
209 #[inline]
256 pub fn append_value<I, V>(&mut self, i: I)
257 where
258 T: Extend<Option<V>>,
259 I: IntoIterator<Item = Option<V>>,
260 {
261 self.extend(std::iter::once(Some(i)))
262 }
263
264 #[inline]
268 pub fn append_null(&mut self) {
269 self.offsets_builder.append(self.next_offset());
270 self.null_buffer_builder.append_null();
271 }
272
273 #[inline]
275 pub fn append_nulls(&mut self, n: usize) {
276 let next_offset = self.next_offset();
277 self.offsets_builder.append_n(n, next_offset);
278 self.null_buffer_builder.append_n_nulls(n);
279 }
280
281 #[inline]
285 pub fn append_option<I, V>(&mut self, i: Option<I>)
286 where
287 T: Extend<Option<V>>,
288 I: IntoIterator<Item = Option<V>>,
289 {
290 match i {
291 Some(i) => self.append_value(i),
292 None => self.append_null(),
293 }
294 }
295
296 pub fn finish(&mut self) -> GenericListArray<OffsetSize> {
298 let values = self.values_builder.finish();
299 let nulls = self.null_buffer_builder.finish();
300
301 let offsets = self.offsets_builder.finish();
302 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
304 self.offsets_builder.append(OffsetSize::zero());
305
306 let field = match &self.field {
307 Some(f) => f.clone(),
308 None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
309 };
310
311 GenericListArray::new(field, offsets, values, nulls)
312 }
313
314 pub fn finish_cloned(&self) -> GenericListArray<OffsetSize> {
316 let values = self.values_builder.finish_cloned();
317 let nulls = self.null_buffer_builder.finish_cloned();
318
319 let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice());
320 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
322
323 let field = match &self.field {
324 Some(f) => f.clone(),
325 None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
326 };
327
328 GenericListArray::new(field, offsets, values, nulls)
329 }
330
331 pub fn offsets_slice(&self) -> &[OffsetSize] {
333 self.offsets_builder.as_slice()
334 }
335
336 pub fn validity_slice(&self) -> Option<&[u8]> {
338 self.null_buffer_builder.as_slice()
339 }
340}
341
342impl<O, B, V, E> Extend<Option<V>> for GenericListBuilder<O, B>
343where
344 O: OffsetSizeTrait,
345 B: ArrayBuilder + Extend<E>,
346 V: IntoIterator<Item = E>,
347{
348 #[inline]
349 fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) {
350 for v in iter {
351 match v {
352 Some(elements) => {
353 self.values_builder.extend(elements);
354 self.append(true);
355 }
356 None => self.append(false),
357 }
358 }
359 }
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365 use crate::builder::{make_builder, Int32Builder, ListBuilder};
366 use crate::cast::AsArray;
367 use crate::types::Int32Type;
368 use crate::Int32Array;
369 use arrow_schema::DataType;
370
371 fn _test_generic_list_array_builder<O: OffsetSizeTrait>() {
372 let values_builder = Int32Builder::with_capacity(10);
373 let mut builder = GenericListBuilder::<O, _>::new(values_builder);
374
375 builder.values().append_value(0);
377 builder.values().append_value(1);
378 builder.values().append_value(2);
379 builder.append(true);
380 builder.values().append_value(3);
381 builder.values().append_value(4);
382 builder.values().append_value(5);
383 builder.append(true);
384 builder.values().append_value(6);
385 builder.values().append_value(7);
386 builder.append(true);
387 let list_array = builder.finish();
388
389 let list_values = list_array.values().as_primitive::<Int32Type>();
390 assert_eq!(list_values.values(), &[0, 1, 2, 3, 4, 5, 6, 7]);
391 assert_eq!(list_array.value_offsets(), [0, 3, 6, 8].map(O::usize_as));
392 assert_eq!(DataType::Int32, list_array.value_type());
393 assert_eq!(3, list_array.len());
394 assert_eq!(0, list_array.null_count());
395 assert_eq!(O::from_usize(6).unwrap(), list_array.value_offsets()[2]);
396 assert_eq!(O::from_usize(2).unwrap(), list_array.value_length(2));
397 for i in 0..3 {
398 assert!(list_array.is_valid(i));
399 assert!(!list_array.is_null(i));
400 }
401 }
402
403 #[test]
404 fn test_list_array_builder() {
405 _test_generic_list_array_builder::<i32>()
406 }
407
408 #[test]
409 fn test_large_list_array_builder() {
410 _test_generic_list_array_builder::<i64>()
411 }
412
413 fn _test_generic_list_array_builder_nulls<O: OffsetSizeTrait>() {
414 let values_builder = Int32Builder::with_capacity(10);
415 let mut builder = GenericListBuilder::<O, _>::new(values_builder);
416
417 builder.values().append_value(0);
419 builder.values().append_value(1);
420 builder.values().append_value(2);
421 builder.append(true);
422 builder.append(false);
423 builder.values().append_value(3);
424 builder.values().append_null();
425 builder.values().append_value(5);
426 builder.append(true);
427 builder.values().append_value(6);
428 builder.values().append_value(7);
429 builder.append(true);
430 builder.append_nulls(2);
431 builder.values().append_value(8);
432 builder.append(true);
433
434 let list_array = builder.finish();
435
436 assert_eq!(DataType::Int32, list_array.value_type());
437 assert_eq!(7, list_array.len());
438 assert_eq!(3, list_array.null_count());
439 assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]);
440 assert_eq!(O::from_usize(9).unwrap(), list_array.value_offsets()[7]);
441 assert_eq!(O::from_usize(3).unwrap(), list_array.value_length(2));
442 assert!(list_array.is_null(4));
443 assert!(list_array.is_null(5));
444 }
445
446 #[test]
447 fn test_list_array_builder_nulls() {
448 _test_generic_list_array_builder_nulls::<i32>()
449 }
450
451 #[test]
452 fn test_large_list_array_builder_nulls() {
453 _test_generic_list_array_builder_nulls::<i64>()
454 }
455
456 #[test]
457 fn test_list_array_builder_finish() {
458 let values_builder = Int32Array::builder(5);
459 let mut builder = ListBuilder::new(values_builder);
460
461 builder.values().append_slice(&[1, 2, 3]);
462 builder.append(true);
463 builder.values().append_slice(&[4, 5, 6]);
464 builder.append(true);
465
466 let mut arr = builder.finish();
467 assert_eq!(2, arr.len());
468 assert!(builder.is_empty());
469
470 builder.values().append_slice(&[7, 8, 9]);
471 builder.append(true);
472 arr = builder.finish();
473 assert_eq!(1, arr.len());
474 assert!(builder.is_empty());
475 }
476
477 #[test]
478 fn test_list_array_builder_finish_cloned() {
479 let values_builder = Int32Array::builder(5);
480 let mut builder = ListBuilder::new(values_builder);
481
482 builder.values().append_slice(&[1, 2, 3]);
483 builder.append(true);
484 builder.values().append_slice(&[4, 5, 6]);
485 builder.append(true);
486
487 let mut arr = builder.finish_cloned();
488 assert_eq!(2, arr.len());
489 assert!(!builder.is_empty());
490
491 builder.values().append_slice(&[7, 8, 9]);
492 builder.append(true);
493 arr = builder.finish();
494 assert_eq!(3, arr.len());
495 assert!(builder.is_empty());
496 }
497
498 #[test]
499 fn test_list_list_array_builder() {
500 let primitive_builder = Int32Builder::with_capacity(10);
501 let values_builder = ListBuilder::new(primitive_builder);
502 let mut builder = ListBuilder::new(values_builder);
503
504 builder.values().values().append_value(1);
506 builder.values().values().append_value(2);
507 builder.values().append(true);
508 builder.values().values().append_value(3);
509 builder.values().values().append_value(4);
510 builder.values().append(true);
511 builder.append(true);
512
513 builder.values().values().append_value(5);
514 builder.values().values().append_value(6);
515 builder.values().values().append_value(7);
516 builder.values().append(true);
517 builder.values().append(false);
518 builder.values().values().append_value(8);
519 builder.values().append(true);
520 builder.append(true);
521
522 builder.append(false);
523
524 builder.values().values().append_value(9);
525 builder.values().values().append_value(10);
526 builder.values().append(true);
527 builder.append(true);
528
529 let l1 = builder.finish();
530
531 assert_eq!(4, l1.len());
532 assert_eq!(1, l1.null_count());
533
534 assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6]);
535 let l2 = l1.values().as_list::<i32>();
536
537 assert_eq!(6, l2.len());
538 assert_eq!(1, l2.null_count());
539 assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10]);
540
541 let i1 = l2.values().as_primitive::<Int32Type>();
542 assert_eq!(10, i1.len());
543 assert_eq!(0, i1.null_count());
544 assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
545 }
546
547 #[test]
548 fn test_extend() {
549 let mut builder = ListBuilder::new(Int32Builder::new());
550 builder.extend([
551 Some(vec![Some(1), Some(2), Some(7), None]),
552 Some(vec![]),
553 Some(vec![Some(4), Some(5)]),
554 None,
555 ]);
556
557 let array = builder.finish();
558 assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]);
559 assert_eq!(array.null_count(), 1);
560 assert_eq!(array.logical_null_count(), 1);
561 assert!(array.is_null(3));
562 let elements = array.values().as_primitive::<Int32Type>();
563 assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
564 assert_eq!(elements.null_count(), 1);
565 assert_eq!(elements.logical_null_count(), 1);
566 assert!(elements.is_null(3));
567 }
568
569 #[test]
570 fn test_boxed_primitive_array_builder() {
571 let values_builder = make_builder(&DataType::Int32, 5);
572 let mut builder = ListBuilder::new(values_builder);
573
574 builder
575 .values()
576 .as_any_mut()
577 .downcast_mut::<Int32Builder>()
578 .expect("should be an Int32Builder")
579 .append_slice(&[1, 2, 3]);
580 builder.append(true);
581
582 builder
583 .values()
584 .as_any_mut()
585 .downcast_mut::<Int32Builder>()
586 .expect("should be an Int32Builder")
587 .append_slice(&[4, 5, 6]);
588 builder.append(true);
589
590 let arr = builder.finish();
591 assert_eq!(2, arr.len());
592
593 let elements = arr.values().as_primitive::<Int32Type>();
594 assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]);
595 }
596
597 #[test]
598 fn test_boxed_list_list_array_builder() {
599 let values_builder = make_builder(
601 &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
602 10,
603 );
604 test_boxed_generic_list_generic_list_array_builder::<i32>(values_builder);
605 }
606
607 #[test]
608 fn test_boxed_large_list_large_list_array_builder() {
609 let values_builder = make_builder(
611 &DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))),
612 10,
613 );
614 test_boxed_generic_list_generic_list_array_builder::<i64>(values_builder);
615 }
616
617 fn test_boxed_generic_list_generic_list_array_builder<O: OffsetSizeTrait + PartialEq>(
618 values_builder: Box<dyn ArrayBuilder>,
619 ) {
620 let mut builder: GenericListBuilder<O, Box<dyn ArrayBuilder>> =
621 GenericListBuilder::<O, Box<dyn ArrayBuilder>>::new(values_builder);
622
623 builder
625 .values()
626 .as_any_mut()
627 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
628 .expect("should be an (Large)ListBuilder")
629 .values()
630 .as_any_mut()
631 .downcast_mut::<Int32Builder>()
632 .expect("should be an Int32Builder")
633 .append_value(1);
634 builder
635 .values()
636 .as_any_mut()
637 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
638 .expect("should be an (Large)ListBuilder")
639 .values()
640 .as_any_mut()
641 .downcast_mut::<Int32Builder>()
642 .expect("should be an Int32Builder")
643 .append_value(2);
644 builder
645 .values()
646 .as_any_mut()
647 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
648 .expect("should be an (Large)ListBuilder")
649 .append(true);
650 builder
651 .values()
652 .as_any_mut()
653 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
654 .expect("should be an (Large)ListBuilder")
655 .values()
656 .as_any_mut()
657 .downcast_mut::<Int32Builder>()
658 .expect("should be an Int32Builder")
659 .append_value(3);
660 builder
661 .values()
662 .as_any_mut()
663 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
664 .expect("should be an (Large)ListBuilder")
665 .values()
666 .as_any_mut()
667 .downcast_mut::<Int32Builder>()
668 .expect("should be an Int32Builder")
669 .append_value(4);
670 builder
671 .values()
672 .as_any_mut()
673 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
674 .expect("should be an (Large)ListBuilder")
675 .append(true);
676 builder.append(true);
677
678 builder
679 .values()
680 .as_any_mut()
681 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
682 .expect("should be an (Large)ListBuilder")
683 .values()
684 .as_any_mut()
685 .downcast_mut::<Int32Builder>()
686 .expect("should be an Int32Builder")
687 .append_value(5);
688 builder
689 .values()
690 .as_any_mut()
691 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
692 .expect("should be an (Large)ListBuilder")
693 .values()
694 .as_any_mut()
695 .downcast_mut::<Int32Builder>()
696 .expect("should be an Int32Builder")
697 .append_value(6);
698 builder
699 .values()
700 .as_any_mut()
701 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
702 .expect("should be an (Large)ListBuilder")
703 .values()
704 .as_any_mut()
705 .downcast_mut::<Int32Builder>()
706 .expect("should be an (Large)ListBuilder")
707 .append_value(7);
708 builder
709 .values()
710 .as_any_mut()
711 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
712 .expect("should be an (Large)ListBuilder")
713 .append(true);
714 builder
715 .values()
716 .as_any_mut()
717 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
718 .expect("should be an (Large)ListBuilder")
719 .append(false);
720 builder
721 .values()
722 .as_any_mut()
723 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
724 .expect("should be an (Large)ListBuilder")
725 .values()
726 .as_any_mut()
727 .downcast_mut::<Int32Builder>()
728 .expect("should be an Int32Builder")
729 .append_value(8);
730 builder
731 .values()
732 .as_any_mut()
733 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
734 .expect("should be an (Large)ListBuilder")
735 .append(true);
736 builder.append(true);
737
738 builder.append(false);
739
740 builder
741 .values()
742 .as_any_mut()
743 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
744 .expect("should be an (Large)ListBuilder")
745 .values()
746 .as_any_mut()
747 .downcast_mut::<Int32Builder>()
748 .expect("should be an Int32Builder")
749 .append_value(9);
750 builder
751 .values()
752 .as_any_mut()
753 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
754 .expect("should be an (Large)ListBuilder")
755 .values()
756 .as_any_mut()
757 .downcast_mut::<Int32Builder>()
758 .expect("should be an Int32Builder")
759 .append_value(10);
760 builder
761 .values()
762 .as_any_mut()
763 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
764 .expect("should be an (Large)ListBuilder")
765 .append(true);
766 builder.append(true);
767
768 let l1 = builder.finish();
769
770 assert_eq!(4, l1.len());
771 assert_eq!(1, l1.null_count());
772
773 assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6].map(O::usize_as));
774 let l2 = l1.values().as_list::<O>();
775
776 assert_eq!(6, l2.len());
777 assert_eq!(1, l2.null_count());
778 assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10].map(O::usize_as));
779
780 let i1 = l2.values().as_primitive::<Int32Type>();
781 assert_eq!(10, i1.len());
782 assert_eq!(0, i1.null_count());
783 assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
784 }
785
786 #[test]
787 fn test_with_field() {
788 let field = Arc::new(Field::new("bar", DataType::Int32, false));
789 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
790 builder.append_value([Some(1), Some(2), Some(3)]);
791 builder.append_null(); builder.append_value([Some(4)]);
793 let array = builder.finish();
794 assert_eq!(array.len(), 3);
795 assert_eq!(array.data_type(), &DataType::List(field.clone()));
796
797 builder.append_value([Some(4), Some(5)]);
798 let array = builder.finish();
799 assert_eq!(array.data_type(), &DataType::List(field));
800 assert_eq!(array.len(), 1);
801 }
802
803 #[test]
804 #[should_panic(expected = "Non-nullable field of ListArray \\\"item\\\" cannot contain nulls")]
805 fn test_checks_nullability() {
806 let field = Arc::new(Field::new_list_field(DataType::Int32, false));
807 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
808 builder.append_value([Some(1), None]);
809 builder.finish();
810 }
811
812 #[test]
813 #[should_panic(expected = "ListArray expected data type Int64 got Int32")]
814 fn test_checks_data_type() {
815 let field = Arc::new(Field::new_list_field(DataType::Int64, false));
816 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
817 builder.append_value([Some(1)]);
818 builder.finish();
819 }
820}