1use crate::array::{get_offsets_from_buffer, make_array, print_long_array};
19use crate::builder::{ArrayBuilder, GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
22 iterator::GenericListArrayIter, new_empty_array,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num_integer::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait:
41 ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd
42{
43 const IS_LARGE: bool;
45 const PREFIX: &'static str;
47 const MAX_OFFSET: usize;
49}
50
51impl OffsetSizeTrait for i32 {
52 const IS_LARGE: bool = false;
53 const PREFIX: &'static str = "";
54 const MAX_OFFSET: usize = i32::MAX as usize;
55}
56
57impl OffsetSizeTrait for i64 {
58 const IS_LARGE: bool = true;
59 const PREFIX: &'static str = "Large";
60 const MAX_OFFSET: usize = i64::MAX as usize;
61}
62
63pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
172 data_type: DataType,
173 nulls: Option<NullBuffer>,
174 values: ArrayRef,
175 value_offsets: OffsetBuffer<OffsetSize>,
176}
177
178impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
179 fn clone(&self) -> Self {
180 Self {
181 data_type: self.data_type.clone(),
182 nulls: self.nulls.clone(),
183 values: self.values.clone(),
184 value_offsets: self.value_offsets.clone(),
185 }
186 }
187}
188
189impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
190 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
194 DataType::LargeList
195 } else {
196 DataType::List
197 };
198
199 pub fn try_new(
210 field: FieldRef,
211 offsets: OffsetBuffer<OffsetSize>,
212 values: ArrayRef,
213 nulls: Option<NullBuffer>,
214 ) -> Result<Self, ArrowError> {
215 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
217 if end_offset > values.len() {
220 return Err(ArrowError::InvalidArgumentError(format!(
221 "Max offset of {end_offset} exceeds length of values {}",
222 values.len()
223 )));
224 }
225
226 if let Some(n) = nulls.as_ref() {
227 if n.len() != len {
228 return Err(ArrowError::InvalidArgumentError(format!(
229 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
230 OffsetSize::PREFIX,
231 n.len(),
232 )));
233 }
234 }
235 if !field.is_nullable() && values.is_nullable() {
236 return Err(ArrowError::InvalidArgumentError(format!(
237 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
238 OffsetSize::PREFIX,
239 field.name()
240 )));
241 }
242
243 if field.data_type() != values.data_type() {
244 return Err(ArrowError::InvalidArgumentError(format!(
245 "{}ListArray expected data type {} got {} for {:?}",
246 OffsetSize::PREFIX,
247 field.data_type(),
248 values.data_type(),
249 field.name()
250 )));
251 }
252
253 Ok(Self {
254 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
255 nulls,
256 values,
257 value_offsets: offsets,
258 })
259 }
260
261 pub fn new(
267 field: FieldRef,
268 offsets: OffsetBuffer<OffsetSize>,
269 values: ArrayRef,
270 nulls: Option<NullBuffer>,
271 ) -> Self {
272 Self::try_new(field, offsets, values, nulls).unwrap()
273 }
274
275 pub fn new_null(field: FieldRef, len: usize) -> Self {
277 let values = new_empty_array(field.data_type());
278 Self {
279 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
280 nulls: Some(NullBuffer::new_null(len)),
281 value_offsets: OffsetBuffer::new_zeroed(len),
282 values,
283 }
284 }
285
286 pub fn into_parts(
288 self,
289 ) -> (
290 FieldRef,
291 OffsetBuffer<OffsetSize>,
292 ArrayRef,
293 Option<NullBuffer>,
294 ) {
295 let f = match self.data_type {
296 DataType::List(f) | DataType::LargeList(f) => f,
297 _ => unreachable!(),
298 };
299 (f, self.value_offsets, self.values, self.nulls)
300 }
301
302 #[inline]
311 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
312 &self.value_offsets
313 }
314
315 #[inline]
322 pub fn values(&self) -> &ArrayRef {
323 &self.values
324 }
325
326 pub fn value_type(&self) -> DataType {
328 self.values.data_type().clone()
329 }
330
331 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
339 let end = unsafe { self.value_offsets().get_unchecked(i + 1).as_usize() };
340 let start = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
341 self.values.slice(start, end - start)
342 }
343
344 pub fn value(&self, i: usize) -> ArrayRef {
352 let end = self.value_offsets()[i + 1].as_usize();
353 let start = self.value_offsets()[i].as_usize();
354 self.values.slice(start, end - start)
355 }
356
357 #[inline]
361 pub fn value_offsets(&self) -> &[OffsetSize] {
362 &self.value_offsets
363 }
364
365 #[inline]
367 pub fn value_length(&self, i: usize) -> OffsetSize {
368 let offsets = self.value_offsets();
369 offsets[i + 1] - offsets[i]
370 }
371
372 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
374 GenericListArrayIter::<'a, OffsetSize>::new(self)
375 }
376
377 #[inline]
378 fn get_type(data_type: &DataType) -> Option<&DataType> {
379 match (OffsetSize::IS_LARGE, data_type) {
380 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
381 Some(child.data_type())
382 }
383 _ => None,
384 }
385 }
386
387 pub fn slice(&self, offset: usize, length: usize) -> Self {
393 Self {
394 data_type: self.data_type.clone(),
395 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
396 values: self.values.clone(),
397 value_offsets: self.value_offsets.slice(offset, length),
398 }
399 }
400
401 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
417 where
418 T: ArrowPrimitiveType,
419 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
420 I: IntoIterator<Item = Option<P>>,
421 {
422 Self::from_nested_iter::<PrimitiveBuilder<T>, T::Native, P, I>(iter)
423 }
424
425 pub fn from_nested_iter<B, T, P, I>(iter: I) -> Self
445 where
446 B: ArrayBuilder + Default + Extend<Option<T>>,
447 P: IntoIterator<Item = Option<T>>,
448 I: IntoIterator<Item = Option<P>>,
449 {
450 let iter = iter.into_iter();
451 let size_hint = iter.size_hint().0;
452 let mut builder = GenericListBuilder::with_capacity(B::default(), size_hint);
453
454 for i in iter {
455 match i {
456 Some(p) => {
457 builder.values().extend(p);
458 builder.append(true);
459 }
460 None => builder.append(false),
461 }
462 }
463 builder.finish()
464 }
465}
466
467impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
468 fn from(data: ArrayData) -> Self {
469 Self::try_new_from_array_data(data)
470 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
471 }
472}
473
474impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
475 fn from(array: GenericListArray<OffsetSize>) -> Self {
476 let len = array.len();
477 let builder = ArrayDataBuilder::new(array.data_type)
478 .len(len)
479 .nulls(array.nulls)
480 .buffers(vec![array.value_offsets.into_inner().into_inner()])
481 .child_data(vec![array.values.to_data()]);
482
483 unsafe { builder.build_unchecked() }
484 }
485}
486
487impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
488 fn from(value: FixedSizeListArray) -> Self {
489 let (field, size) = match value.data_type() {
490 DataType::FixedSizeList(f, size) => (f, *size as usize),
491 _ => unreachable!(),
492 };
493
494 let offsets = OffsetBuffer::from_repeated_length(size, value.len());
495
496 Self {
497 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
498 nulls: value.nulls().cloned(),
499 values: value.values().clone(),
500 value_offsets: offsets,
501 }
502 }
503}
504
505impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
506 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
507 let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
508
509 if buffers.len() != 1 {
510 return Err(ArrowError::InvalidArgumentError(format!(
511 "ListArray data should contain a single buffer only (value offsets), had {}",
512 buffers.len()
513 )));
514 }
515 let buffer = buffers.pop().expect("checked above");
516
517 if child_data.len() != 1 {
518 return Err(ArrowError::InvalidArgumentError(format!(
519 "ListArray should contain a single child array (values array), had {}",
520 child_data.len()
521 )));
522 }
523
524 let values = child_data.pop().expect("checked above");
525
526 if let Some(child_data_type) = Self::get_type(&data_type) {
527 if values.data_type() != child_data_type {
528 return Err(ArrowError::InvalidArgumentError(format!(
529 "[Large]ListArray's child datatype {:?} does not \
530 correspond to the List's datatype {:?}",
531 values.data_type(),
532 child_data_type
533 )));
534 }
535 } else {
536 return Err(ArrowError::InvalidArgumentError(format!(
537 "[Large]ListArray's datatype must be [Large]ListArray(). It is {data_type:?}",
538 )));
539 }
540
541 let values = make_array(values);
542 let value_offsets = unsafe { get_offsets_from_buffer(buffer, offset, len) };
545
546 Ok(Self {
547 data_type,
548 nulls,
549 values,
550 value_offsets,
551 })
552 }
553}
554
555unsafe impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
557 fn as_any(&self) -> &dyn Any {
558 self
559 }
560
561 fn to_data(&self) -> ArrayData {
562 self.clone().into()
563 }
564
565 fn into_data(self) -> ArrayData {
566 self.into()
567 }
568
569 fn data_type(&self) -> &DataType {
570 &self.data_type
571 }
572
573 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
574 Arc::new(self.slice(offset, length))
575 }
576
577 fn len(&self) -> usize {
578 self.value_offsets.len() - 1
579 }
580
581 fn is_empty(&self) -> bool {
582 self.value_offsets.len() <= 1
583 }
584
585 fn shrink_to_fit(&mut self) {
586 if let Some(nulls) = &mut self.nulls {
587 nulls.shrink_to_fit();
588 }
589 self.values.shrink_to_fit();
590 self.value_offsets.shrink_to_fit();
591 }
592
593 fn offset(&self) -> usize {
594 0
595 }
596
597 fn nulls(&self) -> Option<&NullBuffer> {
598 self.nulls.as_ref()
599 }
600
601 fn logical_null_count(&self) -> usize {
602 self.null_count()
604 }
605
606 fn get_buffer_memory_size(&self) -> usize {
607 let mut size = self.values.get_buffer_memory_size();
608 size += self.value_offsets.inner().inner().capacity();
609 if let Some(n) = self.nulls.as_ref() {
610 size += n.buffer().capacity();
611 }
612 size
613 }
614
615 fn get_array_memory_size(&self) -> usize {
616 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
617 size += self.value_offsets.inner().inner().capacity();
618 if let Some(n) = self.nulls.as_ref() {
619 size += n.buffer().capacity();
620 }
621 size
622 }
623
624 #[cfg(feature = "pool")]
625 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
626 self.value_offsets.claim(pool);
627 self.values.claim(pool);
628 if let Some(nulls) = &self.nulls {
629 nulls.claim(pool);
630 }
631 }
632}
633
634impl<OffsetSize: OffsetSizeTrait> super::ListLikeArray for GenericListArray<OffsetSize> {
635 fn values(&self) -> &ArrayRef {
636 self.values()
637 }
638
639 fn element_range(&self, index: usize) -> std::ops::Range<usize> {
640 let offsets = self.offsets();
641 let start = offsets[index].as_usize();
642 let end = offsets[index + 1].as_usize();
643 start..end
644 }
645}
646
647impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
648 type Item = ArrayRef;
649
650 fn value(&self, index: usize) -> Self::Item {
651 GenericListArray::value(self, index)
652 }
653
654 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
655 GenericListArray::value(self, index)
656 }
657}
658
659impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
660 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
661 let prefix = OffsetSize::PREFIX;
662
663 write!(f, "{prefix}ListArray\n[\n")?;
664 print_long_array(self, f, |array, index, f| {
665 std::fmt::Debug::fmt(&array.value(index), f)
666 })?;
667 write!(f, "]")
668 }
669}
670
671pub type ListArray = GenericListArray<i32>;
675
676pub type LargeListArray = GenericListArray<i64>;
680
681#[cfg(test)]
682mod tests {
683 use super::*;
684 use crate::builder::{
685 BooleanBuilder, FixedSizeListBuilder, Int32Builder, ListBuilder, StringBuilder,
686 StringDictionaryBuilder, UnionBuilder,
687 };
688 use crate::cast::AsArray;
689 use crate::types::{Int8Type, Int32Type};
690 use crate::{
691 BooleanArray, Int8Array, Int8DictionaryArray, Int32Array, Int64Array, StringArray,
692 };
693 use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
694 use arrow_schema::Field;
695
696 fn create_from_buffers() -> ListArray {
697 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
699 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
700 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
701 ListArray::new(field, offsets, Arc::new(values), None)
702 }
703
704 #[test]
705 fn test_from_iter_primitive() {
706 let data = vec![
707 Some(vec![Some(0), Some(1), Some(2)]),
708 Some(vec![Some(3), Some(4), Some(5)]),
709 Some(vec![Some(6), Some(7)]),
710 ];
711 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
712
713 let another = create_from_buffers();
714 assert_eq!(list_array, another)
715 }
716
717 #[test]
718 fn test_empty_list_array() {
719 let value_data = ArrayData::builder(DataType::Int32)
721 .len(0)
722 .add_buffer(Buffer::from([]))
723 .build()
724 .unwrap();
725
726 let value_offsets = Buffer::from([]);
728
729 let list_data_type =
731 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
732 let list_data = ArrayData::builder(list_data_type)
733 .len(0)
734 .add_buffer(value_offsets)
735 .add_child_data(value_data)
736 .build()
737 .unwrap();
738
739 let list_array = ListArray::from(list_data);
740 assert_eq!(list_array.len(), 0)
741 }
742
743 #[test]
744 fn test_list_array() {
745 let value_data = ArrayData::builder(DataType::Int32)
747 .len(8)
748 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
749 .build()
750 .unwrap();
751
752 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
755
756 let list_data_type =
758 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
759 let list_data = ArrayData::builder(list_data_type.clone())
760 .len(3)
761 .add_buffer(value_offsets.clone())
762 .add_child_data(value_data.clone())
763 .build()
764 .unwrap();
765 let list_array = ListArray::from(list_data);
766
767 let values = list_array.values();
768 assert_eq!(value_data, values.to_data());
769 assert_eq!(DataType::Int32, list_array.value_type());
770 assert_eq!(3, list_array.len());
771 assert_eq!(0, list_array.null_count());
772 assert_eq!(6, list_array.value_offsets()[2]);
773 assert_eq!(2, list_array.value_length(2));
774 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
775 assert_eq!(
776 0,
777 unsafe { list_array.value_unchecked(0) }
778 .as_primitive::<Int32Type>()
779 .value(0)
780 );
781 for i in 0..3 {
782 assert!(list_array.is_valid(i));
783 assert!(!list_array.is_null(i));
784 }
785
786 let list_data = ArrayData::builder(list_data_type)
789 .len(2)
790 .offset(1)
791 .add_buffer(value_offsets)
792 .add_child_data(value_data.clone())
793 .build()
794 .unwrap();
795 let list_array = ListArray::from(list_data);
796
797 let values = list_array.values();
798 assert_eq!(value_data, values.to_data());
799 assert_eq!(DataType::Int32, list_array.value_type());
800 assert_eq!(2, list_array.len());
801 assert_eq!(0, list_array.null_count());
802 assert_eq!(6, list_array.value_offsets()[1]);
803 assert_eq!(2, list_array.value_length(1));
804 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
805 assert_eq!(
806 3,
807 unsafe { list_array.value_unchecked(0) }
808 .as_primitive::<Int32Type>()
809 .value(0)
810 );
811 }
812
813 #[test]
814 fn test_large_list_array() {
815 let value_data = ArrayData::builder(DataType::Int32)
817 .len(8)
818 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
819 .build()
820 .unwrap();
821
822 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
825
826 let list_data_type = DataType::new_large_list(DataType::Int32, false);
828 let list_data = ArrayData::builder(list_data_type.clone())
829 .len(3)
830 .add_buffer(value_offsets.clone())
831 .add_child_data(value_data.clone())
832 .build()
833 .unwrap();
834 let list_array = LargeListArray::from(list_data);
835
836 let values = list_array.values();
837 assert_eq!(value_data, values.to_data());
838 assert_eq!(DataType::Int32, list_array.value_type());
839 assert_eq!(3, list_array.len());
840 assert_eq!(0, list_array.null_count());
841 assert_eq!(6, list_array.value_offsets()[2]);
842 assert_eq!(2, list_array.value_length(2));
843 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
844 assert_eq!(
845 0,
846 unsafe { list_array.value_unchecked(0) }
847 .as_primitive::<Int32Type>()
848 .value(0)
849 );
850 for i in 0..3 {
851 assert!(list_array.is_valid(i));
852 assert!(!list_array.is_null(i));
853 }
854
855 let list_data = ArrayData::builder(list_data_type)
858 .len(2)
859 .offset(1)
860 .add_buffer(value_offsets)
861 .add_child_data(value_data.clone())
862 .build()
863 .unwrap();
864 let list_array = LargeListArray::from(list_data);
865
866 let values = list_array.values();
867 assert_eq!(value_data, values.to_data());
868 assert_eq!(DataType::Int32, list_array.value_type());
869 assert_eq!(2, list_array.len());
870 assert_eq!(0, list_array.null_count());
871 assert_eq!(6, list_array.value_offsets()[1]);
872 assert_eq!(2, list_array.value_length(1));
873 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
874 assert_eq!(
875 3,
876 unsafe { list_array.value_unchecked(0) }
877 .as_primitive::<Int32Type>()
878 .value(0)
879 );
880 }
881
882 #[test]
883 fn test_list_array_slice() {
884 let value_data = ArrayData::builder(DataType::Int32)
886 .len(10)
887 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
888 .build()
889 .unwrap();
890
891 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
894 let mut null_bits: [u8; 2] = [0; 2];
896 bit_util::set_bit(&mut null_bits, 0);
897 bit_util::set_bit(&mut null_bits, 3);
898 bit_util::set_bit(&mut null_bits, 4);
899 bit_util::set_bit(&mut null_bits, 6);
900 bit_util::set_bit(&mut null_bits, 8);
901
902 let list_data_type =
904 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
905 let list_data = ArrayData::builder(list_data_type)
906 .len(9)
907 .add_buffer(value_offsets)
908 .add_child_data(value_data.clone())
909 .null_bit_buffer(Some(Buffer::from(null_bits)))
910 .build()
911 .unwrap();
912 let list_array = ListArray::from(list_data);
913
914 let values = list_array.values();
915 assert_eq!(value_data, values.to_data());
916 assert_eq!(DataType::Int32, list_array.value_type());
917 assert_eq!(9, list_array.len());
918 assert_eq!(4, list_array.null_count());
919 assert_eq!(2, list_array.value_offsets()[3]);
920 assert_eq!(2, list_array.value_length(3));
921
922 let sliced_array = list_array.slice(1, 6);
923 assert_eq!(6, sliced_array.len());
924 assert_eq!(3, sliced_array.null_count());
925
926 for i in 0..sliced_array.len() {
927 if bit_util::get_bit(&null_bits, 1 + i) {
928 assert!(sliced_array.is_valid(i));
929 } else {
930 assert!(sliced_array.is_null(i));
931 }
932 }
933
934 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
936 assert_eq!(2, sliced_list_array.value_offsets()[2]);
937 assert_eq!(2, sliced_list_array.value_length(2));
938 assert_eq!(4, sliced_list_array.value_offsets()[3]);
939 assert_eq!(2, sliced_list_array.value_length(3));
940 assert_eq!(6, sliced_list_array.value_offsets()[5]);
941 assert_eq!(3, sliced_list_array.value_length(5));
942 }
943
944 #[test]
945 fn test_large_list_array_slice() {
946 let value_data = ArrayData::builder(DataType::Int32)
948 .len(10)
949 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
950 .build()
951 .unwrap();
952
953 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
956 let mut null_bits: [u8; 2] = [0; 2];
958 bit_util::set_bit(&mut null_bits, 0);
959 bit_util::set_bit(&mut null_bits, 3);
960 bit_util::set_bit(&mut null_bits, 4);
961 bit_util::set_bit(&mut null_bits, 6);
962 bit_util::set_bit(&mut null_bits, 8);
963
964 let list_data_type = DataType::new_large_list(DataType::Int32, false);
966 let list_data = ArrayData::builder(list_data_type)
967 .len(9)
968 .add_buffer(value_offsets)
969 .add_child_data(value_data.clone())
970 .null_bit_buffer(Some(Buffer::from(null_bits)))
971 .build()
972 .unwrap();
973 let list_array = LargeListArray::from(list_data);
974
975 let values = list_array.values();
976 assert_eq!(value_data, values.to_data());
977 assert_eq!(DataType::Int32, list_array.value_type());
978 assert_eq!(9, list_array.len());
979 assert_eq!(4, list_array.null_count());
980 assert_eq!(2, list_array.value_offsets()[3]);
981 assert_eq!(2, list_array.value_length(3));
982
983 let sliced_array = list_array.slice(1, 6);
984 assert_eq!(6, sliced_array.len());
985 assert_eq!(3, sliced_array.null_count());
986
987 for i in 0..sliced_array.len() {
988 if bit_util::get_bit(&null_bits, 1 + i) {
989 assert!(sliced_array.is_valid(i));
990 } else {
991 assert!(sliced_array.is_null(i));
992 }
993 }
994
995 let sliced_list_array = sliced_array
997 .as_any()
998 .downcast_ref::<LargeListArray>()
999 .unwrap();
1000 assert_eq!(2, sliced_list_array.value_offsets()[2]);
1001 assert_eq!(2, sliced_list_array.value_length(2));
1002 assert_eq!(4, sliced_list_array.value_offsets()[3]);
1003 assert_eq!(2, sliced_list_array.value_length(3));
1004 assert_eq!(6, sliced_list_array.value_offsets()[5]);
1005 assert_eq!(3, sliced_list_array.value_length(5));
1006 }
1007
1008 #[test]
1009 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
1010 fn test_list_array_index_out_of_bound() {
1011 let value_data = ArrayData::builder(DataType::Int32)
1013 .len(10)
1014 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
1015 .build()
1016 .unwrap();
1017
1018 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
1021 let mut null_bits: [u8; 2] = [0; 2];
1023 bit_util::set_bit(&mut null_bits, 0);
1024 bit_util::set_bit(&mut null_bits, 3);
1025 bit_util::set_bit(&mut null_bits, 4);
1026 bit_util::set_bit(&mut null_bits, 6);
1027 bit_util::set_bit(&mut null_bits, 8);
1028
1029 let list_data_type = DataType::new_large_list(DataType::Int32, false);
1031 let list_data = ArrayData::builder(list_data_type)
1032 .len(9)
1033 .add_buffer(value_offsets)
1034 .add_child_data(value_data)
1035 .null_bit_buffer(Some(Buffer::from(null_bits)))
1036 .build()
1037 .unwrap();
1038 let list_array = LargeListArray::from(list_data);
1039 assert_eq!(9, list_array.len());
1040
1041 list_array.value(10);
1042 }
1043 #[test]
1044 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
1045 #[cfg(not(feature = "force_validate"))]
1048 fn test_list_array_invalid_buffer_len() {
1049 let value_data = unsafe {
1050 ArrayData::builder(DataType::Int32)
1051 .len(8)
1052 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1053 .build_unchecked()
1054 };
1055 let list_data_type =
1056 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1057 let list_data = unsafe {
1058 ArrayData::builder(list_data_type)
1059 .len(3)
1060 .add_child_data(value_data)
1061 .build_unchecked()
1062 };
1063 drop(ListArray::from(list_data));
1064 }
1065
1066 #[test]
1067 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1068 #[cfg(not(feature = "force_validate"))]
1071 fn test_list_array_invalid_child_array_len() {
1072 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1073 let list_data_type =
1074 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1075 let list_data = unsafe {
1076 ArrayData::builder(list_data_type)
1077 .len(3)
1078 .add_buffer(value_offsets)
1079 .build_unchecked()
1080 };
1081 drop(ListArray::from(list_data));
1082 }
1083
1084 #[test]
1085 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1086 fn test_from_array_data_validation() {
1087 let mut builder = ListBuilder::new(Int32Builder::new());
1088 builder.values().append_value(1);
1089 builder.append(true);
1090 let array = builder.finish();
1091 let _ = LargeListArray::from(array.into_data());
1092 }
1093
1094 #[test]
1095 fn test_list_array_offsets_need_not_start_at_zero() {
1096 let value_data = ArrayData::builder(DataType::Int32)
1097 .len(8)
1098 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1099 .build()
1100 .unwrap();
1101
1102 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1103
1104 let list_data_type =
1105 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1106 let list_data = ArrayData::builder(list_data_type)
1107 .len(3)
1108 .add_buffer(value_offsets)
1109 .add_child_data(value_data)
1110 .build()
1111 .unwrap();
1112
1113 let list_array = ListArray::from(list_data);
1114 assert_eq!(list_array.value_length(0), 0);
1115 assert_eq!(list_array.value_length(1), 3);
1116 assert_eq!(list_array.value_length(2), 2);
1117 }
1118
1119 #[test]
1120 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1121 #[cfg(not(feature = "force_validate"))]
1124 fn test_primitive_array_alignment() {
1125 let buf = Buffer::from_slice_ref([0_u64]);
1126 let buf2 = buf.slice(1);
1127 let array_data = unsafe {
1128 ArrayData::builder(DataType::Int32)
1129 .add_buffer(buf2)
1130 .build_unchecked()
1131 };
1132 drop(Int32Array::from(array_data));
1133 }
1134
1135 #[test]
1136 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1137 #[cfg(not(feature = "force_validate"))]
1140 fn test_list_array_alignment() {
1141 let buf = Buffer::from_slice_ref([0_u64]);
1142 let buf2 = buf.slice(1);
1143
1144 let values: [i32; 8] = [0; 8];
1145 let value_data = unsafe {
1146 ArrayData::builder(DataType::Int32)
1147 .add_buffer(Buffer::from_slice_ref(values))
1148 .build_unchecked()
1149 };
1150
1151 let list_data_type =
1152 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1153 let list_data = unsafe {
1154 ArrayData::builder(list_data_type)
1155 .add_buffer(buf2)
1156 .add_child_data(value_data)
1157 .build_unchecked()
1158 };
1159 drop(ListArray::from(list_data));
1160 }
1161
1162 #[test]
1163 fn list_array_equality() {
1164 fn do_comparison(
1166 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1167 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1168 should_equal: bool,
1169 ) {
1170 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1171 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1172 assert_eq!(lhs == rhs, should_equal);
1173
1174 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1175 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1176 assert_eq!(lhs == rhs, should_equal);
1177 }
1178
1179 do_comparison(
1180 vec![
1181 Some(vec![Some(0), Some(1), Some(2)]),
1182 None,
1183 Some(vec![Some(3), None, Some(5)]),
1184 Some(vec![Some(6), Some(7)]),
1185 ],
1186 vec![
1187 Some(vec![Some(0), Some(1), Some(2)]),
1188 None,
1189 Some(vec![Some(3), None, Some(5)]),
1190 Some(vec![Some(6), Some(7)]),
1191 ],
1192 true,
1193 );
1194
1195 do_comparison(
1196 vec![
1197 None,
1198 None,
1199 Some(vec![Some(3), None, Some(5)]),
1200 Some(vec![Some(6), Some(7)]),
1201 ],
1202 vec![
1203 Some(vec![Some(0), Some(1), Some(2)]),
1204 None,
1205 Some(vec![Some(3), None, Some(5)]),
1206 Some(vec![Some(6), Some(7)]),
1207 ],
1208 false,
1209 );
1210
1211 do_comparison(
1212 vec![
1213 None,
1214 None,
1215 Some(vec![Some(3), None, Some(5)]),
1216 Some(vec![Some(6), Some(7)]),
1217 ],
1218 vec![
1219 None,
1220 None,
1221 Some(vec![Some(3), None, Some(5)]),
1222 Some(vec![Some(0), Some(0)]),
1223 ],
1224 false,
1225 );
1226
1227 do_comparison(
1228 vec![None, None, Some(vec![Some(1)])],
1229 vec![None, None, Some(vec![Some(2)])],
1230 false,
1231 );
1232 }
1233
1234 #[test]
1235 fn test_empty_offsets() {
1236 let f = Arc::new(Field::new("element", DataType::Int32, true));
1237 let string = ListArray::from(
1238 ArrayData::builder(DataType::List(f.clone()))
1239 .buffers(vec![Buffer::from(&[])])
1240 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1241 .build()
1242 .unwrap(),
1243 );
1244 assert_eq!(string.value_offsets(), &[0]);
1245 let string = LargeListArray::from(
1246 ArrayData::builder(DataType::LargeList(f))
1247 .buffers(vec![Buffer::from(&[])])
1248 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1249 .build()
1250 .unwrap(),
1251 );
1252 assert_eq!(string.len(), 0);
1253 assert_eq!(string.value_offsets(), &[0]);
1254 }
1255
1256 #[test]
1257 fn test_try_new() {
1258 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1259 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1260 let values = Arc::new(values) as ArrayRef;
1261
1262 let field = Arc::new(Field::new("element", DataType::Int32, false));
1263 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1264
1265 let nulls = NullBuffer::new_null(3);
1266 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1267
1268 let nulls = NullBuffer::new_null(3);
1269 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1270 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1271 .unwrap_err();
1272
1273 assert_eq!(
1274 err.to_string(),
1275 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1276 );
1277
1278 let field = Arc::new(Field::new("element", DataType::Int64, false));
1279 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1280 .unwrap_err();
1281
1282 assert_eq!(
1283 err.to_string(),
1284 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1285 );
1286
1287 let nulls = NullBuffer::new_null(7);
1288 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1289 let values = Arc::new(values);
1290
1291 let err =
1292 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1293
1294 assert_eq!(
1295 err.to_string(),
1296 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1297 );
1298
1299 let field = Arc::new(Field::new("element", DataType::Int64, true));
1300 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1301
1302 let values = Int64Array::new(vec![0; 2].into(), None);
1303 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1304
1305 assert_eq!(
1306 err.to_string(),
1307 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1308 );
1309 }
1310
1311 #[test]
1312 fn test_from_fixed_size_list() {
1313 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1314 builder.values().append_slice(&[1, 2, 3]);
1315 builder.append(true);
1316 builder.values().append_slice(&[0, 0, 0]);
1317 builder.append(false);
1318 builder.values().append_slice(&[4, 5, 6]);
1319 builder.append(true);
1320 let list: ListArray = builder.finish().into();
1321
1322 let values: Vec<_> = list
1323 .iter()
1324 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1325 .collect();
1326 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1327 }
1328
1329 #[test]
1330 fn test_nullable_union() {
1331 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1332 let mut builder = UnionBuilder::new_dense();
1333 builder.append::<Int32Type>("a", 1).unwrap();
1334 builder.append::<Int32Type>("b", 2).unwrap();
1335 builder.append::<Int32Type>("b", 3).unwrap();
1336 builder.append::<Int32Type>("a", 4).unwrap();
1337 builder.append::<Int32Type>("a", 5).unwrap();
1338 let values = builder.build().unwrap();
1339 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1340 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1341 }
1342
1343 #[test]
1344 fn test_list_new_null_len() {
1345 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
1346 let array = ListArray::new_null(field, 5);
1347 assert_eq!(array.len(), 5);
1348 }
1349
1350 #[test]
1351 fn test_list_from_iter_i32() {
1352 let array = ListArray::from_nested_iter::<Int32Builder, _, _, _>(vec![
1353 None,
1354 Some(vec![Some(1), None, Some(2)]),
1355 ]);
1356 let expected_offsets = &[0, 0, 3];
1357 let expected_values: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, Some(2)]));
1358 assert_eq!(array.value_offsets(), expected_offsets);
1359 assert_eq!(array.values(), &expected_values);
1360 }
1361
1362 #[test]
1363 fn test_list_from_iter_bool() {
1364 let array = ListArray::from_nested_iter::<BooleanBuilder, _, _, _>(vec![
1365 Some(vec![None, Some(false), Some(true)]),
1366 None,
1367 ]);
1368 let expected_offsets = &[0, 3, 3];
1369 let expected_values: ArrayRef =
1370 Arc::new(BooleanArray::from(vec![None, Some(false), Some(true)]));
1371 assert_eq!(array.value_offsets(), expected_offsets);
1372 assert_eq!(array.values(), &expected_values);
1373 }
1374
1375 #[test]
1376 fn test_list_from_iter_str() {
1377 let array = ListArray::from_nested_iter::<StringBuilder, _, _, _>(vec![
1378 Some(vec![Some("foo"), None, Some("bar")]),
1379 None,
1380 ]);
1381 let expected_offsets = &[0, 3, 3];
1382 let expected_values: ArrayRef =
1383 Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")]));
1384 assert_eq!(array.value_offsets(), expected_offsets);
1385 assert_eq!(array.values(), &expected_values);
1386 }
1387
1388 #[test]
1389 fn test_list_from_iter_dict_str() {
1390 let array =
1391 ListArray::from_nested_iter::<StringDictionaryBuilder<Int8Type>, _, _, _>(vec![
1392 Some(vec![Some("foo"), None, Some("bar"), Some("foo")]),
1393 None,
1394 ]);
1395 let expected_offsets = &[0, 4, 4];
1396 let expected_dict_values: ArrayRef =
1397 Arc::new(StringArray::from(vec![Some("foo"), Some("bar")]));
1398 let expected_dict_keys = Int8Array::from(vec![Some(0), None, Some(1), Some(0)]);
1399 let expected_values: ArrayRef = Arc::new(
1400 Int8DictionaryArray::try_new(expected_dict_keys, expected_dict_values).unwrap(),
1401 );
1402 assert_eq!(array.value_offsets(), expected_offsets);
1403 assert_eq!(array.values(), &expected_values);
1404 }
1405}