1use crate::array::{get_offsets_from_buffer, make_array, print_long_array};
19use crate::builder::{ArrayBuilder, GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
22 iterator::GenericListArrayIter, new_empty_array,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num_integer::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait:
41 ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd
42{
43 const IS_LARGE: bool;
45 const PREFIX: &'static str;
47 const MAX_OFFSET: usize;
49}
50
51impl OffsetSizeTrait for i32 {
52 const IS_LARGE: bool = false;
53 const PREFIX: &'static str = "";
54 const MAX_OFFSET: usize = i32::MAX as usize;
55}
56
57impl OffsetSizeTrait for i64 {
58 const IS_LARGE: bool = true;
59 const PREFIX: &'static str = "Large";
60 const MAX_OFFSET: usize = i64::MAX as usize;
61}
62
63pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
172 data_type: DataType,
173 nulls: Option<NullBuffer>,
174 values: ArrayRef,
175 value_offsets: OffsetBuffer<OffsetSize>,
176}
177
178impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
179 fn clone(&self) -> Self {
180 Self {
181 data_type: self.data_type.clone(),
182 nulls: self.nulls.clone(),
183 values: self.values.clone(),
184 value_offsets: self.value_offsets.clone(),
185 }
186 }
187}
188
189impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
190 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
194 DataType::LargeList
195 } else {
196 DataType::List
197 };
198
199 pub fn try_new(
210 field: FieldRef,
211 offsets: OffsetBuffer<OffsetSize>,
212 values: ArrayRef,
213 nulls: Option<NullBuffer>,
214 ) -> Result<Self, ArrowError> {
215 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
217 if end_offset > values.len() {
220 return Err(ArrowError::InvalidArgumentError(format!(
221 "Max offset of {end_offset} exceeds length of values {}",
222 values.len()
223 )));
224 }
225
226 if let Some(n) = nulls.as_ref() {
227 if n.len() != len {
228 return Err(ArrowError::InvalidArgumentError(format!(
229 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
230 OffsetSize::PREFIX,
231 n.len(),
232 )));
233 }
234 }
235 if !field.is_nullable() && values.is_nullable() {
236 return Err(ArrowError::InvalidArgumentError(format!(
237 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
238 OffsetSize::PREFIX,
239 field.name()
240 )));
241 }
242
243 if field.data_type() != values.data_type() {
244 return Err(ArrowError::InvalidArgumentError(format!(
245 "{}ListArray expected data type {} got {} for {:?}",
246 OffsetSize::PREFIX,
247 field.data_type(),
248 values.data_type(),
249 field.name()
250 )));
251 }
252
253 Ok(Self {
254 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
255 nulls,
256 values,
257 value_offsets: offsets,
258 })
259 }
260
261 pub fn new(
267 field: FieldRef,
268 offsets: OffsetBuffer<OffsetSize>,
269 values: ArrayRef,
270 nulls: Option<NullBuffer>,
271 ) -> Self {
272 Self::try_new(field, offsets, values, nulls).unwrap()
273 }
274
275 pub fn new_null(field: FieldRef, len: usize) -> Self {
277 let values = new_empty_array(field.data_type());
278 Self {
279 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
280 nulls: Some(NullBuffer::new_null(len)),
281 value_offsets: OffsetBuffer::new_zeroed(len),
282 values,
283 }
284 }
285
286 pub fn into_parts(
288 self,
289 ) -> (
290 FieldRef,
291 OffsetBuffer<OffsetSize>,
292 ArrayRef,
293 Option<NullBuffer>,
294 ) {
295 let f = match self.data_type {
296 DataType::List(f) | DataType::LargeList(f) => f,
297 _ => unreachable!(),
298 };
299 (f, self.value_offsets, self.values, self.nulls)
300 }
301
302 #[inline]
311 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
312 &self.value_offsets
313 }
314
315 #[inline]
322 pub fn values(&self) -> &ArrayRef {
323 &self.values
324 }
325
326 pub fn value_type(&self) -> DataType {
328 self.values.data_type().clone()
329 }
330
331 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
339 let end = unsafe { self.value_offsets().get_unchecked(i + 1).as_usize() };
340 let start = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
341 self.values.slice(start, end - start)
342 }
343
344 pub fn value(&self, i: usize) -> ArrayRef {
352 let end = self.value_offsets()[i + 1].as_usize();
353 let start = self.value_offsets()[i].as_usize();
354 self.values.slice(start, end - start)
355 }
356
357 #[inline]
361 pub fn value_offsets(&self) -> &[OffsetSize] {
362 &self.value_offsets
363 }
364
365 #[inline]
367 pub fn value_length(&self, i: usize) -> OffsetSize {
368 let offsets = self.value_offsets();
369 offsets[i + 1] - offsets[i]
370 }
371
372 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
374 GenericListArrayIter::<'a, OffsetSize>::new(self)
375 }
376
377 #[inline]
378 fn get_type(data_type: &DataType) -> Option<&DataType> {
379 match (OffsetSize::IS_LARGE, data_type) {
380 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
381 Some(child.data_type())
382 }
383 _ => None,
384 }
385 }
386
387 pub fn slice(&self, offset: usize, length: usize) -> Self {
393 Self {
394 data_type: self.data_type.clone(),
395 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
396 values: self.values.clone(),
397 value_offsets: self.value_offsets.slice(offset, length),
398 }
399 }
400
401 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
417 where
418 T: ArrowPrimitiveType,
419 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
420 I: IntoIterator<Item = Option<P>>,
421 {
422 Self::from_nested_iter::<PrimitiveBuilder<T>, T::Native, P, I>(iter)
423 }
424
425 pub fn from_nested_iter<B, T, P, I>(iter: I) -> Self
445 where
446 B: ArrayBuilder + Default + Extend<Option<T>>,
447 P: IntoIterator<Item = Option<T>>,
448 I: IntoIterator<Item = Option<P>>,
449 {
450 let iter = iter.into_iter();
451 let size_hint = iter.size_hint().0;
452 let mut builder = GenericListBuilder::with_capacity(B::default(), size_hint);
453
454 for i in iter {
455 match i {
456 Some(p) => {
457 builder.values().extend(p);
458 builder.append(true);
459 }
460 None => builder.append(false),
461 }
462 }
463 builder.finish()
464 }
465}
466
467impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
468 fn from(data: ArrayData) -> Self {
469 Self::try_new_from_array_data(data)
470 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
471 }
472}
473
474impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
475 fn from(array: GenericListArray<OffsetSize>) -> Self {
476 let len = array.len();
477 let builder = ArrayDataBuilder::new(array.data_type)
478 .len(len)
479 .nulls(array.nulls)
480 .buffers(vec![array.value_offsets.into_inner().into_inner()])
481 .child_data(vec![array.values.to_data()]);
482
483 unsafe { builder.build_unchecked() }
484 }
485}
486
487impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
488 fn from(value: FixedSizeListArray) -> Self {
489 let (field, size) = match value.data_type() {
490 DataType::FixedSizeList(f, size) => (f, *size as usize),
491 _ => unreachable!(),
492 };
493
494 let offsets = OffsetBuffer::from_repeated_length(size, value.len());
495
496 Self {
497 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
498 nulls: value.nulls().cloned(),
499 values: value.values().clone(),
500 value_offsets: offsets,
501 }
502 }
503}
504
505impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
506 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
507 let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
508
509 if buffers.len() != 1 {
510 return Err(ArrowError::InvalidArgumentError(format!(
511 "ListArray data should contain a single buffer only (value offsets), had {}",
512 buffers.len()
513 )));
514 }
515 let buffer = buffers.pop().expect("checked above");
516
517 if child_data.len() != 1 {
518 return Err(ArrowError::InvalidArgumentError(format!(
519 "ListArray should contain a single child array (values array), had {}",
520 child_data.len()
521 )));
522 }
523
524 let values = child_data.pop().expect("checked above");
525
526 if let Some(child_data_type) = Self::get_type(&data_type) {
527 if values.data_type() != child_data_type {
528 return Err(ArrowError::InvalidArgumentError(format!(
529 "[Large]ListArray's child datatype {:?} does not \
530 correspond to the List's datatype {:?}",
531 values.data_type(),
532 child_data_type
533 )));
534 }
535 } else {
536 return Err(ArrowError::InvalidArgumentError(format!(
537 "[Large]ListArray's datatype must be [Large]ListArray(). It is {data_type:?}",
538 )));
539 }
540
541 let values = make_array(values);
542 let value_offsets = unsafe { get_offsets_from_buffer(buffer, offset, len) };
545
546 Ok(Self {
547 data_type,
548 nulls,
549 values,
550 value_offsets,
551 })
552 }
553}
554
555unsafe impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
557 fn as_any(&self) -> &dyn Any {
558 self
559 }
560
561 fn to_data(&self) -> ArrayData {
562 self.clone().into()
563 }
564
565 fn into_data(self) -> ArrayData {
566 self.into()
567 }
568
569 fn data_type(&self) -> &DataType {
570 &self.data_type
571 }
572
573 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
574 Arc::new(self.slice(offset, length))
575 }
576
577 fn len(&self) -> usize {
578 self.value_offsets.len() - 1
579 }
580
581 fn is_empty(&self) -> bool {
582 self.value_offsets.len() <= 1
583 }
584
585 fn shrink_to_fit(&mut self) {
586 if let Some(nulls) = &mut self.nulls {
587 nulls.shrink_to_fit();
588 }
589 self.values.shrink_to_fit();
590 self.value_offsets.shrink_to_fit();
591 }
592
593 fn offset(&self) -> usize {
594 0
595 }
596
597 fn nulls(&self) -> Option<&NullBuffer> {
598 self.nulls.as_ref()
599 }
600
601 fn logical_null_count(&self) -> usize {
602 self.null_count()
604 }
605
606 fn get_buffer_memory_size(&self) -> usize {
607 let mut size = self.values.get_buffer_memory_size();
608 size += self.value_offsets.inner().inner().capacity();
609 if let Some(n) = self.nulls.as_ref() {
610 size += n.buffer().capacity();
611 }
612 size
613 }
614
615 fn get_array_memory_size(&self) -> usize {
616 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
617 size += self.value_offsets.inner().inner().capacity();
618 if let Some(n) = self.nulls.as_ref() {
619 size += n.buffer().capacity();
620 }
621 size
622 }
623}
624
625impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
626 type Item = ArrayRef;
627
628 fn value(&self, index: usize) -> Self::Item {
629 GenericListArray::value(self, index)
630 }
631
632 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
633 GenericListArray::value(self, index)
634 }
635}
636
637impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
638 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
639 let prefix = OffsetSize::PREFIX;
640
641 write!(f, "{prefix}ListArray\n[\n")?;
642 print_long_array(self, f, |array, index, f| {
643 std::fmt::Debug::fmt(&array.value(index), f)
644 })?;
645 write!(f, "]")
646 }
647}
648
649pub type ListArray = GenericListArray<i32>;
653
654pub type LargeListArray = GenericListArray<i64>;
658
659#[cfg(test)]
660mod tests {
661 use super::*;
662 use crate::builder::{
663 BooleanBuilder, FixedSizeListBuilder, Int32Builder, ListBuilder, StringBuilder,
664 StringDictionaryBuilder, UnionBuilder,
665 };
666 use crate::cast::AsArray;
667 use crate::types::{Int8Type, Int32Type};
668 use crate::{
669 BooleanArray, Int8Array, Int8DictionaryArray, Int32Array, Int64Array, StringArray,
670 };
671 use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
672 use arrow_schema::Field;
673
674 fn create_from_buffers() -> ListArray {
675 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
677 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
678 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
679 ListArray::new(field, offsets, Arc::new(values), None)
680 }
681
682 #[test]
683 fn test_from_iter_primitive() {
684 let data = vec![
685 Some(vec![Some(0), Some(1), Some(2)]),
686 Some(vec![Some(3), Some(4), Some(5)]),
687 Some(vec![Some(6), Some(7)]),
688 ];
689 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
690
691 let another = create_from_buffers();
692 assert_eq!(list_array, another)
693 }
694
695 #[test]
696 fn test_empty_list_array() {
697 let value_data = ArrayData::builder(DataType::Int32)
699 .len(0)
700 .add_buffer(Buffer::from([]))
701 .build()
702 .unwrap();
703
704 let value_offsets = Buffer::from([]);
706
707 let list_data_type =
709 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
710 let list_data = ArrayData::builder(list_data_type)
711 .len(0)
712 .add_buffer(value_offsets)
713 .add_child_data(value_data)
714 .build()
715 .unwrap();
716
717 let list_array = ListArray::from(list_data);
718 assert_eq!(list_array.len(), 0)
719 }
720
721 #[test]
722 fn test_list_array() {
723 let value_data = ArrayData::builder(DataType::Int32)
725 .len(8)
726 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
727 .build()
728 .unwrap();
729
730 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
733
734 let list_data_type =
736 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
737 let list_data = ArrayData::builder(list_data_type.clone())
738 .len(3)
739 .add_buffer(value_offsets.clone())
740 .add_child_data(value_data.clone())
741 .build()
742 .unwrap();
743 let list_array = ListArray::from(list_data);
744
745 let values = list_array.values();
746 assert_eq!(value_data, values.to_data());
747 assert_eq!(DataType::Int32, list_array.value_type());
748 assert_eq!(3, list_array.len());
749 assert_eq!(0, list_array.null_count());
750 assert_eq!(6, list_array.value_offsets()[2]);
751 assert_eq!(2, list_array.value_length(2));
752 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
753 assert_eq!(
754 0,
755 unsafe { list_array.value_unchecked(0) }
756 .as_primitive::<Int32Type>()
757 .value(0)
758 );
759 for i in 0..3 {
760 assert!(list_array.is_valid(i));
761 assert!(!list_array.is_null(i));
762 }
763
764 let list_data = ArrayData::builder(list_data_type)
767 .len(2)
768 .offset(1)
769 .add_buffer(value_offsets)
770 .add_child_data(value_data.clone())
771 .build()
772 .unwrap();
773 let list_array = ListArray::from(list_data);
774
775 let values = list_array.values();
776 assert_eq!(value_data, values.to_data());
777 assert_eq!(DataType::Int32, list_array.value_type());
778 assert_eq!(2, list_array.len());
779 assert_eq!(0, list_array.null_count());
780 assert_eq!(6, list_array.value_offsets()[1]);
781 assert_eq!(2, list_array.value_length(1));
782 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
783 assert_eq!(
784 3,
785 unsafe { list_array.value_unchecked(0) }
786 .as_primitive::<Int32Type>()
787 .value(0)
788 );
789 }
790
791 #[test]
792 fn test_large_list_array() {
793 let value_data = ArrayData::builder(DataType::Int32)
795 .len(8)
796 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
797 .build()
798 .unwrap();
799
800 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
803
804 let list_data_type = DataType::new_large_list(DataType::Int32, false);
806 let list_data = ArrayData::builder(list_data_type.clone())
807 .len(3)
808 .add_buffer(value_offsets.clone())
809 .add_child_data(value_data.clone())
810 .build()
811 .unwrap();
812 let list_array = LargeListArray::from(list_data);
813
814 let values = list_array.values();
815 assert_eq!(value_data, values.to_data());
816 assert_eq!(DataType::Int32, list_array.value_type());
817 assert_eq!(3, list_array.len());
818 assert_eq!(0, list_array.null_count());
819 assert_eq!(6, list_array.value_offsets()[2]);
820 assert_eq!(2, list_array.value_length(2));
821 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
822 assert_eq!(
823 0,
824 unsafe { list_array.value_unchecked(0) }
825 .as_primitive::<Int32Type>()
826 .value(0)
827 );
828 for i in 0..3 {
829 assert!(list_array.is_valid(i));
830 assert!(!list_array.is_null(i));
831 }
832
833 let list_data = ArrayData::builder(list_data_type)
836 .len(2)
837 .offset(1)
838 .add_buffer(value_offsets)
839 .add_child_data(value_data.clone())
840 .build()
841 .unwrap();
842 let list_array = LargeListArray::from(list_data);
843
844 let values = list_array.values();
845 assert_eq!(value_data, values.to_data());
846 assert_eq!(DataType::Int32, list_array.value_type());
847 assert_eq!(2, list_array.len());
848 assert_eq!(0, list_array.null_count());
849 assert_eq!(6, list_array.value_offsets()[1]);
850 assert_eq!(2, list_array.value_length(1));
851 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
852 assert_eq!(
853 3,
854 unsafe { list_array.value_unchecked(0) }
855 .as_primitive::<Int32Type>()
856 .value(0)
857 );
858 }
859
860 #[test]
861 fn test_list_array_slice() {
862 let value_data = ArrayData::builder(DataType::Int32)
864 .len(10)
865 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
866 .build()
867 .unwrap();
868
869 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
872 let mut null_bits: [u8; 2] = [0; 2];
874 bit_util::set_bit(&mut null_bits, 0);
875 bit_util::set_bit(&mut null_bits, 3);
876 bit_util::set_bit(&mut null_bits, 4);
877 bit_util::set_bit(&mut null_bits, 6);
878 bit_util::set_bit(&mut null_bits, 8);
879
880 let list_data_type =
882 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
883 let list_data = ArrayData::builder(list_data_type)
884 .len(9)
885 .add_buffer(value_offsets)
886 .add_child_data(value_data.clone())
887 .null_bit_buffer(Some(Buffer::from(null_bits)))
888 .build()
889 .unwrap();
890 let list_array = ListArray::from(list_data);
891
892 let values = list_array.values();
893 assert_eq!(value_data, values.to_data());
894 assert_eq!(DataType::Int32, list_array.value_type());
895 assert_eq!(9, list_array.len());
896 assert_eq!(4, list_array.null_count());
897 assert_eq!(2, list_array.value_offsets()[3]);
898 assert_eq!(2, list_array.value_length(3));
899
900 let sliced_array = list_array.slice(1, 6);
901 assert_eq!(6, sliced_array.len());
902 assert_eq!(3, sliced_array.null_count());
903
904 for i in 0..sliced_array.len() {
905 if bit_util::get_bit(&null_bits, 1 + i) {
906 assert!(sliced_array.is_valid(i));
907 } else {
908 assert!(sliced_array.is_null(i));
909 }
910 }
911
912 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
914 assert_eq!(2, sliced_list_array.value_offsets()[2]);
915 assert_eq!(2, sliced_list_array.value_length(2));
916 assert_eq!(4, sliced_list_array.value_offsets()[3]);
917 assert_eq!(2, sliced_list_array.value_length(3));
918 assert_eq!(6, sliced_list_array.value_offsets()[5]);
919 assert_eq!(3, sliced_list_array.value_length(5));
920 }
921
922 #[test]
923 fn test_large_list_array_slice() {
924 let value_data = ArrayData::builder(DataType::Int32)
926 .len(10)
927 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
928 .build()
929 .unwrap();
930
931 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
934 let mut null_bits: [u8; 2] = [0; 2];
936 bit_util::set_bit(&mut null_bits, 0);
937 bit_util::set_bit(&mut null_bits, 3);
938 bit_util::set_bit(&mut null_bits, 4);
939 bit_util::set_bit(&mut null_bits, 6);
940 bit_util::set_bit(&mut null_bits, 8);
941
942 let list_data_type = DataType::new_large_list(DataType::Int32, false);
944 let list_data = ArrayData::builder(list_data_type)
945 .len(9)
946 .add_buffer(value_offsets)
947 .add_child_data(value_data.clone())
948 .null_bit_buffer(Some(Buffer::from(null_bits)))
949 .build()
950 .unwrap();
951 let list_array = LargeListArray::from(list_data);
952
953 let values = list_array.values();
954 assert_eq!(value_data, values.to_data());
955 assert_eq!(DataType::Int32, list_array.value_type());
956 assert_eq!(9, list_array.len());
957 assert_eq!(4, list_array.null_count());
958 assert_eq!(2, list_array.value_offsets()[3]);
959 assert_eq!(2, list_array.value_length(3));
960
961 let sliced_array = list_array.slice(1, 6);
962 assert_eq!(6, sliced_array.len());
963 assert_eq!(3, sliced_array.null_count());
964
965 for i in 0..sliced_array.len() {
966 if bit_util::get_bit(&null_bits, 1 + i) {
967 assert!(sliced_array.is_valid(i));
968 } else {
969 assert!(sliced_array.is_null(i));
970 }
971 }
972
973 let sliced_list_array = sliced_array
975 .as_any()
976 .downcast_ref::<LargeListArray>()
977 .unwrap();
978 assert_eq!(2, sliced_list_array.value_offsets()[2]);
979 assert_eq!(2, sliced_list_array.value_length(2));
980 assert_eq!(4, sliced_list_array.value_offsets()[3]);
981 assert_eq!(2, sliced_list_array.value_length(3));
982 assert_eq!(6, sliced_list_array.value_offsets()[5]);
983 assert_eq!(3, sliced_list_array.value_length(5));
984 }
985
986 #[test]
987 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
988 fn test_list_array_index_out_of_bound() {
989 let value_data = ArrayData::builder(DataType::Int32)
991 .len(10)
992 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
993 .build()
994 .unwrap();
995
996 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
999 let mut null_bits: [u8; 2] = [0; 2];
1001 bit_util::set_bit(&mut null_bits, 0);
1002 bit_util::set_bit(&mut null_bits, 3);
1003 bit_util::set_bit(&mut null_bits, 4);
1004 bit_util::set_bit(&mut null_bits, 6);
1005 bit_util::set_bit(&mut null_bits, 8);
1006
1007 let list_data_type = DataType::new_large_list(DataType::Int32, false);
1009 let list_data = ArrayData::builder(list_data_type)
1010 .len(9)
1011 .add_buffer(value_offsets)
1012 .add_child_data(value_data)
1013 .null_bit_buffer(Some(Buffer::from(null_bits)))
1014 .build()
1015 .unwrap();
1016 let list_array = LargeListArray::from(list_data);
1017 assert_eq!(9, list_array.len());
1018
1019 list_array.value(10);
1020 }
1021 #[test]
1022 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
1023 #[cfg(not(feature = "force_validate"))]
1026 fn test_list_array_invalid_buffer_len() {
1027 let value_data = unsafe {
1028 ArrayData::builder(DataType::Int32)
1029 .len(8)
1030 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1031 .build_unchecked()
1032 };
1033 let list_data_type =
1034 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1035 let list_data = unsafe {
1036 ArrayData::builder(list_data_type)
1037 .len(3)
1038 .add_child_data(value_data)
1039 .build_unchecked()
1040 };
1041 drop(ListArray::from(list_data));
1042 }
1043
1044 #[test]
1045 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1046 #[cfg(not(feature = "force_validate"))]
1049 fn test_list_array_invalid_child_array_len() {
1050 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1051 let list_data_type =
1052 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1053 let list_data = unsafe {
1054 ArrayData::builder(list_data_type)
1055 .len(3)
1056 .add_buffer(value_offsets)
1057 .build_unchecked()
1058 };
1059 drop(ListArray::from(list_data));
1060 }
1061
1062 #[test]
1063 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1064 fn test_from_array_data_validation() {
1065 let mut builder = ListBuilder::new(Int32Builder::new());
1066 builder.values().append_value(1);
1067 builder.append(true);
1068 let array = builder.finish();
1069 let _ = LargeListArray::from(array.into_data());
1070 }
1071
1072 #[test]
1073 fn test_list_array_offsets_need_not_start_at_zero() {
1074 let value_data = ArrayData::builder(DataType::Int32)
1075 .len(8)
1076 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1077 .build()
1078 .unwrap();
1079
1080 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1081
1082 let list_data_type =
1083 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1084 let list_data = ArrayData::builder(list_data_type)
1085 .len(3)
1086 .add_buffer(value_offsets)
1087 .add_child_data(value_data)
1088 .build()
1089 .unwrap();
1090
1091 let list_array = ListArray::from(list_data);
1092 assert_eq!(list_array.value_length(0), 0);
1093 assert_eq!(list_array.value_length(1), 3);
1094 assert_eq!(list_array.value_length(2), 2);
1095 }
1096
1097 #[test]
1098 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1099 #[cfg(not(feature = "force_validate"))]
1102 fn test_primitive_array_alignment() {
1103 let buf = Buffer::from_slice_ref([0_u64]);
1104 let buf2 = buf.slice(1);
1105 let array_data = unsafe {
1106 ArrayData::builder(DataType::Int32)
1107 .add_buffer(buf2)
1108 .build_unchecked()
1109 };
1110 drop(Int32Array::from(array_data));
1111 }
1112
1113 #[test]
1114 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1115 #[cfg(not(feature = "force_validate"))]
1118 fn test_list_array_alignment() {
1119 let buf = Buffer::from_slice_ref([0_u64]);
1120 let buf2 = buf.slice(1);
1121
1122 let values: [i32; 8] = [0; 8];
1123 let value_data = unsafe {
1124 ArrayData::builder(DataType::Int32)
1125 .add_buffer(Buffer::from_slice_ref(values))
1126 .build_unchecked()
1127 };
1128
1129 let list_data_type =
1130 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1131 let list_data = unsafe {
1132 ArrayData::builder(list_data_type)
1133 .add_buffer(buf2)
1134 .add_child_data(value_data)
1135 .build_unchecked()
1136 };
1137 drop(ListArray::from(list_data));
1138 }
1139
1140 #[test]
1141 fn list_array_equality() {
1142 fn do_comparison(
1144 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1145 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1146 should_equal: bool,
1147 ) {
1148 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1149 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1150 assert_eq!(lhs == rhs, should_equal);
1151
1152 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1153 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1154 assert_eq!(lhs == rhs, should_equal);
1155 }
1156
1157 do_comparison(
1158 vec![
1159 Some(vec![Some(0), Some(1), Some(2)]),
1160 None,
1161 Some(vec![Some(3), None, Some(5)]),
1162 Some(vec![Some(6), Some(7)]),
1163 ],
1164 vec![
1165 Some(vec![Some(0), Some(1), Some(2)]),
1166 None,
1167 Some(vec![Some(3), None, Some(5)]),
1168 Some(vec![Some(6), Some(7)]),
1169 ],
1170 true,
1171 );
1172
1173 do_comparison(
1174 vec![
1175 None,
1176 None,
1177 Some(vec![Some(3), None, Some(5)]),
1178 Some(vec![Some(6), Some(7)]),
1179 ],
1180 vec![
1181 Some(vec![Some(0), Some(1), Some(2)]),
1182 None,
1183 Some(vec![Some(3), None, Some(5)]),
1184 Some(vec![Some(6), Some(7)]),
1185 ],
1186 false,
1187 );
1188
1189 do_comparison(
1190 vec![
1191 None,
1192 None,
1193 Some(vec![Some(3), None, Some(5)]),
1194 Some(vec![Some(6), Some(7)]),
1195 ],
1196 vec![
1197 None,
1198 None,
1199 Some(vec![Some(3), None, Some(5)]),
1200 Some(vec![Some(0), Some(0)]),
1201 ],
1202 false,
1203 );
1204
1205 do_comparison(
1206 vec![None, None, Some(vec![Some(1)])],
1207 vec![None, None, Some(vec![Some(2)])],
1208 false,
1209 );
1210 }
1211
1212 #[test]
1213 fn test_empty_offsets() {
1214 let f = Arc::new(Field::new("element", DataType::Int32, true));
1215 let string = ListArray::from(
1216 ArrayData::builder(DataType::List(f.clone()))
1217 .buffers(vec![Buffer::from(&[])])
1218 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1219 .build()
1220 .unwrap(),
1221 );
1222 assert_eq!(string.value_offsets(), &[0]);
1223 let string = LargeListArray::from(
1224 ArrayData::builder(DataType::LargeList(f))
1225 .buffers(vec![Buffer::from(&[])])
1226 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1227 .build()
1228 .unwrap(),
1229 );
1230 assert_eq!(string.len(), 0);
1231 assert_eq!(string.value_offsets(), &[0]);
1232 }
1233
1234 #[test]
1235 fn test_try_new() {
1236 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1237 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1238 let values = Arc::new(values) as ArrayRef;
1239
1240 let field = Arc::new(Field::new("element", DataType::Int32, false));
1241 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1242
1243 let nulls = NullBuffer::new_null(3);
1244 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1245
1246 let nulls = NullBuffer::new_null(3);
1247 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1248 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1249 .unwrap_err();
1250
1251 assert_eq!(
1252 err.to_string(),
1253 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1254 );
1255
1256 let field = Arc::new(Field::new("element", DataType::Int64, false));
1257 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1258 .unwrap_err();
1259
1260 assert_eq!(
1261 err.to_string(),
1262 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1263 );
1264
1265 let nulls = NullBuffer::new_null(7);
1266 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1267 let values = Arc::new(values);
1268
1269 let err =
1270 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1271
1272 assert_eq!(
1273 err.to_string(),
1274 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1275 );
1276
1277 let field = Arc::new(Field::new("element", DataType::Int64, true));
1278 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1279
1280 let values = Int64Array::new(vec![0; 2].into(), None);
1281 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1282
1283 assert_eq!(
1284 err.to_string(),
1285 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1286 );
1287 }
1288
1289 #[test]
1290 fn test_from_fixed_size_list() {
1291 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1292 builder.values().append_slice(&[1, 2, 3]);
1293 builder.append(true);
1294 builder.values().append_slice(&[0, 0, 0]);
1295 builder.append(false);
1296 builder.values().append_slice(&[4, 5, 6]);
1297 builder.append(true);
1298 let list: ListArray = builder.finish().into();
1299
1300 let values: Vec<_> = list
1301 .iter()
1302 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1303 .collect();
1304 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1305 }
1306
1307 #[test]
1308 fn test_nullable_union() {
1309 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1310 let mut builder = UnionBuilder::new_dense();
1311 builder.append::<Int32Type>("a", 1).unwrap();
1312 builder.append::<Int32Type>("b", 2).unwrap();
1313 builder.append::<Int32Type>("b", 3).unwrap();
1314 builder.append::<Int32Type>("a", 4).unwrap();
1315 builder.append::<Int32Type>("a", 5).unwrap();
1316 let values = builder.build().unwrap();
1317 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1318 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1319 }
1320
1321 #[test]
1322 fn test_list_new_null_len() {
1323 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
1324 let array = ListArray::new_null(field, 5);
1325 assert_eq!(array.len(), 5);
1326 }
1327
1328 #[test]
1329 fn test_list_from_iter_i32() {
1330 let array = ListArray::from_nested_iter::<Int32Builder, _, _, _>(vec![
1331 None,
1332 Some(vec![Some(1), None, Some(2)]),
1333 ]);
1334 let expected_offsets = &[0, 0, 3];
1335 let expected_values: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, Some(2)]));
1336 assert_eq!(array.value_offsets(), expected_offsets);
1337 assert_eq!(array.values(), &expected_values);
1338 }
1339
1340 #[test]
1341 fn test_list_from_iter_bool() {
1342 let array = ListArray::from_nested_iter::<BooleanBuilder, _, _, _>(vec![
1343 Some(vec![None, Some(false), Some(true)]),
1344 None,
1345 ]);
1346 let expected_offsets = &[0, 3, 3];
1347 let expected_values: ArrayRef =
1348 Arc::new(BooleanArray::from(vec![None, Some(false), Some(true)]));
1349 assert_eq!(array.value_offsets(), expected_offsets);
1350 assert_eq!(array.values(), &expected_values);
1351 }
1352
1353 #[test]
1354 fn test_list_from_iter_str() {
1355 let array = ListArray::from_nested_iter::<StringBuilder, _, _, _>(vec![
1356 Some(vec![Some("foo"), None, Some("bar")]),
1357 None,
1358 ]);
1359 let expected_offsets = &[0, 3, 3];
1360 let expected_values: ArrayRef =
1361 Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")]));
1362 assert_eq!(array.value_offsets(), expected_offsets);
1363 assert_eq!(array.values(), &expected_values);
1364 }
1365
1366 #[test]
1367 fn test_list_from_iter_dict_str() {
1368 let array =
1369 ListArray::from_nested_iter::<StringDictionaryBuilder<Int8Type>, _, _, _>(vec![
1370 Some(vec![Some("foo"), None, Some("bar"), Some("foo")]),
1371 None,
1372 ]);
1373 let expected_offsets = &[0, 4, 4];
1374 let expected_dict_values: ArrayRef =
1375 Arc::new(StringArray::from(vec![Some("foo"), Some("bar")]));
1376 let expected_dict_keys = Int8Array::from(vec![Some(0), None, Some(1), Some(0)]);
1377 let expected_values: ArrayRef = Arc::new(
1378 Int8DictionaryArray::try_new(expected_dict_keys, expected_dict_values).unwrap(),
1379 );
1380 assert_eq!(array.value_offsets(), expected_offsets);
1381 assert_eq!(array.values(), &expected_values);
1382 }
1383}