1use crate::builder::ArrayBuilder;
19use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
20use arrow_buffer::NullBufferBuilder;
21use arrow_buffer::{Buffer, OffsetBuffer};
22use arrow_schema::{Field, FieldRef};
23use std::any::Any;
24use std::sync::Arc;
25
26#[derive(Debug)]
88pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
89 offsets_builder: Vec<OffsetSize>,
90 null_buffer_builder: NullBufferBuilder,
91 values_builder: T,
92 field: Option<FieldRef>,
93}
94
95impl<O: OffsetSizeTrait, T: ArrayBuilder + Default> Default for GenericListBuilder<O, T> {
96 fn default() -> Self {
97 Self::new(T::default())
98 }
99}
100
101impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> {
102 pub fn new(values_builder: T) -> Self {
104 let capacity = values_builder.len();
105 Self::with_capacity(values_builder, capacity)
106 }
107
108 pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
111 let mut offsets_builder = Vec::with_capacity(capacity + 1);
112 offsets_builder.push(OffsetSize::zero());
113 Self {
114 offsets_builder,
115 null_buffer_builder: NullBufferBuilder::new(capacity),
116 values_builder,
117 field: None,
118 }
119 }
120
121 pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
128 Self {
129 field: Some(field.into()),
130 ..self
131 }
132 }
133}
134
135impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
136 for GenericListBuilder<OffsetSize, T>
137where
138 T: 'static,
139{
140 fn as_any(&self) -> &dyn Any {
142 self
143 }
144
145 fn as_any_mut(&mut self) -> &mut dyn Any {
147 self
148 }
149
150 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
152 self
153 }
154
155 fn len(&self) -> usize {
157 self.null_buffer_builder.len()
158 }
159
160 fn finish(&mut self) -> ArrayRef {
162 Arc::new(self.finish())
163 }
164
165 fn finish_cloned(&self) -> ArrayRef {
167 Arc::new(self.finish_cloned())
168 }
169}
170
171impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
172where
173 T: 'static,
174{
175 pub fn values(&mut self) -> &mut T {
180 &mut self.values_builder
181 }
182
183 pub fn values_ref(&self) -> &T {
185 &self.values_builder
186 }
187
188 #[inline]
194 pub fn append(&mut self, is_valid: bool) {
195 self.offsets_builder.push(self.next_offset());
196 self.null_buffer_builder.append(is_valid);
197 }
198
199 #[inline]
205 fn next_offset(&self) -> OffsetSize {
206 OffsetSize::from_usize(self.values_builder.len()).unwrap()
207 }
208
209 #[inline]
256 pub fn append_value<I, V>(&mut self, i: I)
257 where
258 T: Extend<Option<V>>,
259 I: IntoIterator<Item = Option<V>>,
260 {
261 self.extend(std::iter::once(Some(i)))
262 }
263
264 #[inline]
268 pub fn append_null(&mut self) {
269 self.offsets_builder.push(self.next_offset());
270 self.null_buffer_builder.append_null();
271 }
272
273 #[inline]
275 pub fn append_nulls(&mut self, n: usize) {
276 let next_offset = self.next_offset();
277 self.offsets_builder
278 .extend(std::iter::repeat_n(next_offset, n));
279 self.null_buffer_builder.append_n_nulls(n);
280 }
281
282 #[inline]
286 pub fn append_option<I, V>(&mut self, i: Option<I>)
287 where
288 T: Extend<Option<V>>,
289 I: IntoIterator<Item = Option<V>>,
290 {
291 match i {
292 Some(i) => self.append_value(i),
293 None => self.append_null(),
294 }
295 }
296
297 pub fn finish(&mut self) -> GenericListArray<OffsetSize> {
299 let values = self.values_builder.finish();
300 let nulls = self.null_buffer_builder.finish();
301
302 let offsets = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
303 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
305 self.offsets_builder.push(OffsetSize::zero());
306
307 let field = match &self.field {
308 Some(f) => f.clone(),
309 None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
310 };
311
312 GenericListArray::new(field, offsets, values, nulls)
313 }
314
315 pub fn finish_cloned(&self) -> GenericListArray<OffsetSize> {
317 let values = self.values_builder.finish_cloned();
318 let nulls = self.null_buffer_builder.finish_cloned();
319
320 let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice());
321 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
323
324 let field = match &self.field {
325 Some(f) => f.clone(),
326 None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
327 };
328
329 GenericListArray::new(field, offsets, values, nulls)
330 }
331
332 pub fn offsets_slice(&self) -> &[OffsetSize] {
334 self.offsets_builder.as_slice()
335 }
336
337 pub fn validity_slice(&self) -> Option<&[u8]> {
339 self.null_buffer_builder.as_slice()
340 }
341}
342
343impl<O, B, V, E> Extend<Option<V>> for GenericListBuilder<O, B>
344where
345 O: OffsetSizeTrait,
346 B: ArrayBuilder + Extend<E>,
347 V: IntoIterator<Item = E>,
348{
349 #[inline]
350 fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) {
351 for v in iter {
352 match v {
353 Some(elements) => {
354 self.values_builder.extend(elements);
355 self.append(true);
356 }
357 None => self.append(false),
358 }
359 }
360 }
361}
362
363#[cfg(test)]
364mod tests {
365 use super::*;
366 use crate::builder::{make_builder, Int32Builder, ListBuilder};
367 use crate::cast::AsArray;
368 use crate::types::Int32Type;
369 use crate::Int32Array;
370 use arrow_schema::DataType;
371
372 fn _test_generic_list_array_builder<O: OffsetSizeTrait>() {
373 let values_builder = Int32Builder::with_capacity(10);
374 let mut builder = GenericListBuilder::<O, _>::new(values_builder);
375
376 builder.values().append_value(0);
378 builder.values().append_value(1);
379 builder.values().append_value(2);
380 builder.append(true);
381 builder.values().append_value(3);
382 builder.values().append_value(4);
383 builder.values().append_value(5);
384 builder.append(true);
385 builder.values().append_value(6);
386 builder.values().append_value(7);
387 builder.append(true);
388 let list_array = builder.finish();
389
390 let list_values = list_array.values().as_primitive::<Int32Type>();
391 assert_eq!(list_values.values(), &[0, 1, 2, 3, 4, 5, 6, 7]);
392 assert_eq!(list_array.value_offsets(), [0, 3, 6, 8].map(O::usize_as));
393 assert_eq!(DataType::Int32, list_array.value_type());
394 assert_eq!(3, list_array.len());
395 assert_eq!(0, list_array.null_count());
396 assert_eq!(O::from_usize(6).unwrap(), list_array.value_offsets()[2]);
397 assert_eq!(O::from_usize(2).unwrap(), list_array.value_length(2));
398 for i in 0..3 {
399 assert!(list_array.is_valid(i));
400 assert!(!list_array.is_null(i));
401 }
402 }
403
404 #[test]
405 fn test_list_array_builder() {
406 _test_generic_list_array_builder::<i32>()
407 }
408
409 #[test]
410 fn test_large_list_array_builder() {
411 _test_generic_list_array_builder::<i64>()
412 }
413
414 fn _test_generic_list_array_builder_nulls<O: OffsetSizeTrait>() {
415 let values_builder = Int32Builder::with_capacity(10);
416 let mut builder = GenericListBuilder::<O, _>::new(values_builder);
417
418 builder.values().append_value(0);
420 builder.values().append_value(1);
421 builder.values().append_value(2);
422 builder.append(true);
423 builder.append(false);
424 builder.values().append_value(3);
425 builder.values().append_null();
426 builder.values().append_value(5);
427 builder.append(true);
428 builder.values().append_value(6);
429 builder.values().append_value(7);
430 builder.append(true);
431 builder.append_nulls(2);
432 builder.values().append_value(8);
433 builder.append(true);
434
435 let list_array = builder.finish();
436
437 assert_eq!(DataType::Int32, list_array.value_type());
438 assert_eq!(7, list_array.len());
439 assert_eq!(3, list_array.null_count());
440 assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]);
441 assert_eq!(O::from_usize(9).unwrap(), list_array.value_offsets()[7]);
442 assert_eq!(O::from_usize(3).unwrap(), list_array.value_length(2));
443 assert!(list_array.is_null(4));
444 assert!(list_array.is_null(5));
445 }
446
447 #[test]
448 fn test_list_array_builder_nulls() {
449 _test_generic_list_array_builder_nulls::<i32>()
450 }
451
452 #[test]
453 fn test_large_list_array_builder_nulls() {
454 _test_generic_list_array_builder_nulls::<i64>()
455 }
456
457 #[test]
458 fn test_list_array_builder_finish() {
459 let values_builder = Int32Array::builder(5);
460 let mut builder = ListBuilder::new(values_builder);
461
462 builder.values().append_slice(&[1, 2, 3]);
463 builder.append(true);
464 builder.values().append_slice(&[4, 5, 6]);
465 builder.append(true);
466
467 let mut arr = builder.finish();
468 assert_eq!(2, arr.len());
469 assert!(builder.is_empty());
470
471 builder.values().append_slice(&[7, 8, 9]);
472 builder.append(true);
473 arr = builder.finish();
474 assert_eq!(1, arr.len());
475 assert!(builder.is_empty());
476 }
477
478 #[test]
479 fn test_list_array_builder_finish_cloned() {
480 let values_builder = Int32Array::builder(5);
481 let mut builder = ListBuilder::new(values_builder);
482
483 builder.values().append_slice(&[1, 2, 3]);
484 builder.append(true);
485 builder.values().append_slice(&[4, 5, 6]);
486 builder.append(true);
487
488 let mut arr = builder.finish_cloned();
489 assert_eq!(2, arr.len());
490 assert!(!builder.is_empty());
491
492 builder.values().append_slice(&[7, 8, 9]);
493 builder.append(true);
494 arr = builder.finish();
495 assert_eq!(3, arr.len());
496 assert!(builder.is_empty());
497 }
498
499 #[test]
500 fn test_list_list_array_builder() {
501 let primitive_builder = Int32Builder::with_capacity(10);
502 let values_builder = ListBuilder::new(primitive_builder);
503 let mut builder = ListBuilder::new(values_builder);
504
505 builder.values().values().append_value(1);
507 builder.values().values().append_value(2);
508 builder.values().append(true);
509 builder.values().values().append_value(3);
510 builder.values().values().append_value(4);
511 builder.values().append(true);
512 builder.append(true);
513
514 builder.values().values().append_value(5);
515 builder.values().values().append_value(6);
516 builder.values().values().append_value(7);
517 builder.values().append(true);
518 builder.values().append(false);
519 builder.values().values().append_value(8);
520 builder.values().append(true);
521 builder.append(true);
522
523 builder.append(false);
524
525 builder.values().values().append_value(9);
526 builder.values().values().append_value(10);
527 builder.values().append(true);
528 builder.append(true);
529
530 let l1 = builder.finish();
531
532 assert_eq!(4, l1.len());
533 assert_eq!(1, l1.null_count());
534
535 assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6]);
536 let l2 = l1.values().as_list::<i32>();
537
538 assert_eq!(6, l2.len());
539 assert_eq!(1, l2.null_count());
540 assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10]);
541
542 let i1 = l2.values().as_primitive::<Int32Type>();
543 assert_eq!(10, i1.len());
544 assert_eq!(0, i1.null_count());
545 assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
546 }
547
548 #[test]
549 fn test_extend() {
550 let mut builder = ListBuilder::new(Int32Builder::new());
551 builder.extend([
552 Some(vec![Some(1), Some(2), Some(7), None]),
553 Some(vec![]),
554 Some(vec![Some(4), Some(5)]),
555 None,
556 ]);
557
558 let array = builder.finish();
559 assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]);
560 assert_eq!(array.null_count(), 1);
561 assert_eq!(array.logical_null_count(), 1);
562 assert!(array.is_null(3));
563 let elements = array.values().as_primitive::<Int32Type>();
564 assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
565 assert_eq!(elements.null_count(), 1);
566 assert_eq!(elements.logical_null_count(), 1);
567 assert!(elements.is_null(3));
568 }
569
570 #[test]
571 fn test_boxed_primitive_array_builder() {
572 let values_builder = make_builder(&DataType::Int32, 5);
573 let mut builder = ListBuilder::new(values_builder);
574
575 builder
576 .values()
577 .as_any_mut()
578 .downcast_mut::<Int32Builder>()
579 .expect("should be an Int32Builder")
580 .append_slice(&[1, 2, 3]);
581 builder.append(true);
582
583 builder
584 .values()
585 .as_any_mut()
586 .downcast_mut::<Int32Builder>()
587 .expect("should be an Int32Builder")
588 .append_slice(&[4, 5, 6]);
589 builder.append(true);
590
591 let arr = builder.finish();
592 assert_eq!(2, arr.len());
593
594 let elements = arr.values().as_primitive::<Int32Type>();
595 assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]);
596 }
597
598 #[test]
599 fn test_boxed_list_list_array_builder() {
600 let values_builder = make_builder(
602 &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
603 10,
604 );
605 test_boxed_generic_list_generic_list_array_builder::<i32>(values_builder);
606 }
607
608 #[test]
609 fn test_boxed_large_list_large_list_array_builder() {
610 let values_builder = make_builder(
612 &DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))),
613 10,
614 );
615 test_boxed_generic_list_generic_list_array_builder::<i64>(values_builder);
616 }
617
618 fn test_boxed_generic_list_generic_list_array_builder<O: OffsetSizeTrait + PartialEq>(
619 values_builder: Box<dyn ArrayBuilder>,
620 ) {
621 let mut builder: GenericListBuilder<O, Box<dyn ArrayBuilder>> =
622 GenericListBuilder::<O, Box<dyn ArrayBuilder>>::new(values_builder);
623
624 builder
626 .values()
627 .as_any_mut()
628 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
629 .expect("should be an (Large)ListBuilder")
630 .values()
631 .as_any_mut()
632 .downcast_mut::<Int32Builder>()
633 .expect("should be an Int32Builder")
634 .append_value(1);
635 builder
636 .values()
637 .as_any_mut()
638 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
639 .expect("should be an (Large)ListBuilder")
640 .values()
641 .as_any_mut()
642 .downcast_mut::<Int32Builder>()
643 .expect("should be an Int32Builder")
644 .append_value(2);
645 builder
646 .values()
647 .as_any_mut()
648 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
649 .expect("should be an (Large)ListBuilder")
650 .append(true);
651 builder
652 .values()
653 .as_any_mut()
654 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
655 .expect("should be an (Large)ListBuilder")
656 .values()
657 .as_any_mut()
658 .downcast_mut::<Int32Builder>()
659 .expect("should be an Int32Builder")
660 .append_value(3);
661 builder
662 .values()
663 .as_any_mut()
664 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
665 .expect("should be an (Large)ListBuilder")
666 .values()
667 .as_any_mut()
668 .downcast_mut::<Int32Builder>()
669 .expect("should be an Int32Builder")
670 .append_value(4);
671 builder
672 .values()
673 .as_any_mut()
674 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
675 .expect("should be an (Large)ListBuilder")
676 .append(true);
677 builder.append(true);
678
679 builder
680 .values()
681 .as_any_mut()
682 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
683 .expect("should be an (Large)ListBuilder")
684 .values()
685 .as_any_mut()
686 .downcast_mut::<Int32Builder>()
687 .expect("should be an Int32Builder")
688 .append_value(5);
689 builder
690 .values()
691 .as_any_mut()
692 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
693 .expect("should be an (Large)ListBuilder")
694 .values()
695 .as_any_mut()
696 .downcast_mut::<Int32Builder>()
697 .expect("should be an Int32Builder")
698 .append_value(6);
699 builder
700 .values()
701 .as_any_mut()
702 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
703 .expect("should be an (Large)ListBuilder")
704 .values()
705 .as_any_mut()
706 .downcast_mut::<Int32Builder>()
707 .expect("should be an (Large)ListBuilder")
708 .append_value(7);
709 builder
710 .values()
711 .as_any_mut()
712 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
713 .expect("should be an (Large)ListBuilder")
714 .append(true);
715 builder
716 .values()
717 .as_any_mut()
718 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
719 .expect("should be an (Large)ListBuilder")
720 .append(false);
721 builder
722 .values()
723 .as_any_mut()
724 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
725 .expect("should be an (Large)ListBuilder")
726 .values()
727 .as_any_mut()
728 .downcast_mut::<Int32Builder>()
729 .expect("should be an Int32Builder")
730 .append_value(8);
731 builder
732 .values()
733 .as_any_mut()
734 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
735 .expect("should be an (Large)ListBuilder")
736 .append(true);
737 builder.append(true);
738
739 builder.append(false);
740
741 builder
742 .values()
743 .as_any_mut()
744 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
745 .expect("should be an (Large)ListBuilder")
746 .values()
747 .as_any_mut()
748 .downcast_mut::<Int32Builder>()
749 .expect("should be an Int32Builder")
750 .append_value(9);
751 builder
752 .values()
753 .as_any_mut()
754 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
755 .expect("should be an (Large)ListBuilder")
756 .values()
757 .as_any_mut()
758 .downcast_mut::<Int32Builder>()
759 .expect("should be an Int32Builder")
760 .append_value(10);
761 builder
762 .values()
763 .as_any_mut()
764 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
765 .expect("should be an (Large)ListBuilder")
766 .append(true);
767 builder.append(true);
768
769 let l1 = builder.finish();
770
771 assert_eq!(4, l1.len());
772 assert_eq!(1, l1.null_count());
773
774 assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6].map(O::usize_as));
775 let l2 = l1.values().as_list::<O>();
776
777 assert_eq!(6, l2.len());
778 assert_eq!(1, l2.null_count());
779 assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10].map(O::usize_as));
780
781 let i1 = l2.values().as_primitive::<Int32Type>();
782 assert_eq!(10, i1.len());
783 assert_eq!(0, i1.null_count());
784 assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
785 }
786
787 #[test]
788 fn test_with_field() {
789 let field = Arc::new(Field::new("bar", DataType::Int32, false));
790 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
791 builder.append_value([Some(1), Some(2), Some(3)]);
792 builder.append_null(); builder.append_value([Some(4)]);
794 let array = builder.finish();
795 assert_eq!(array.len(), 3);
796 assert_eq!(array.data_type(), &DataType::List(field.clone()));
797
798 builder.append_value([Some(4), Some(5)]);
799 let array = builder.finish();
800 assert_eq!(array.data_type(), &DataType::List(field));
801 assert_eq!(array.len(), 1);
802 }
803
804 #[test]
805 #[should_panic(expected = "Non-nullable field of ListArray \\\"item\\\" cannot contain nulls")]
806 fn test_checks_nullability() {
807 let field = Arc::new(Field::new_list_field(DataType::Int32, false));
808 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
809 builder.append_value([Some(1), None]);
810 builder.finish();
811 }
812
813 #[test]
814 #[should_panic(expected = "ListArray expected data type Int64 got Int32")]
815 fn test_checks_data_type() {
816 let field = Arc::new(Field::new_list_field(DataType::Int64, false));
817 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
818 builder.append_value([Some(1)]);
819 builder.finish();
820 }
821}