1use crate::array::{get_offsets_from_buffer, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
22 iterator::GenericListArrayIter, new_empty_array,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num_integer::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait:
41 ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd
42{
43 const IS_LARGE: bool;
45 const PREFIX: &'static str;
47 const MAX_OFFSET: usize;
49}
50
51impl OffsetSizeTrait for i32 {
52 const IS_LARGE: bool = false;
53 const PREFIX: &'static str = "";
54 const MAX_OFFSET: usize = i32::MAX as usize;
55}
56
57impl OffsetSizeTrait for i64 {
58 const IS_LARGE: bool = true;
59 const PREFIX: &'static str = "Large";
60 const MAX_OFFSET: usize = i64::MAX as usize;
61}
62
63pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
172 data_type: DataType,
173 nulls: Option<NullBuffer>,
174 values: ArrayRef,
175 value_offsets: OffsetBuffer<OffsetSize>,
176}
177
178impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
179 fn clone(&self) -> Self {
180 Self {
181 data_type: self.data_type.clone(),
182 nulls: self.nulls.clone(),
183 values: self.values.clone(),
184 value_offsets: self.value_offsets.clone(),
185 }
186 }
187}
188
189impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
190 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
194 DataType::LargeList
195 } else {
196 DataType::List
197 };
198
199 pub fn try_new(
210 field: FieldRef,
211 offsets: OffsetBuffer<OffsetSize>,
212 values: ArrayRef,
213 nulls: Option<NullBuffer>,
214 ) -> Result<Self, ArrowError> {
215 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
217 if end_offset > values.len() {
220 return Err(ArrowError::InvalidArgumentError(format!(
221 "Max offset of {end_offset} exceeds length of values {}",
222 values.len()
223 )));
224 }
225
226 if let Some(n) = nulls.as_ref() {
227 if n.len() != len {
228 return Err(ArrowError::InvalidArgumentError(format!(
229 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
230 OffsetSize::PREFIX,
231 n.len(),
232 )));
233 }
234 }
235 if !field.is_nullable() && values.is_nullable() {
236 return Err(ArrowError::InvalidArgumentError(format!(
237 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
238 OffsetSize::PREFIX,
239 field.name()
240 )));
241 }
242
243 if field.data_type() != values.data_type() {
244 return Err(ArrowError::InvalidArgumentError(format!(
245 "{}ListArray expected data type {} got {} for {:?}",
246 OffsetSize::PREFIX,
247 field.data_type(),
248 values.data_type(),
249 field.name()
250 )));
251 }
252
253 Ok(Self {
254 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
255 nulls,
256 values,
257 value_offsets: offsets,
258 })
259 }
260
261 pub fn new(
267 field: FieldRef,
268 offsets: OffsetBuffer<OffsetSize>,
269 values: ArrayRef,
270 nulls: Option<NullBuffer>,
271 ) -> Self {
272 Self::try_new(field, offsets, values, nulls).unwrap()
273 }
274
275 pub fn new_null(field: FieldRef, len: usize) -> Self {
277 let values = new_empty_array(field.data_type());
278 Self {
279 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
280 nulls: Some(NullBuffer::new_null(len)),
281 value_offsets: OffsetBuffer::new_zeroed(len),
282 values,
283 }
284 }
285
286 pub fn into_parts(
288 self,
289 ) -> (
290 FieldRef,
291 OffsetBuffer<OffsetSize>,
292 ArrayRef,
293 Option<NullBuffer>,
294 ) {
295 let f = match self.data_type {
296 DataType::List(f) | DataType::LargeList(f) => f,
297 _ => unreachable!(),
298 };
299 (f, self.value_offsets, self.values, self.nulls)
300 }
301
302 #[inline]
311 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
312 &self.value_offsets
313 }
314
315 #[inline]
322 pub fn values(&self) -> &ArrayRef {
323 &self.values
324 }
325
326 pub fn value_type(&self) -> DataType {
328 self.values.data_type().clone()
329 }
330
331 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
339 let end = unsafe { self.value_offsets().get_unchecked(i + 1).as_usize() };
340 let start = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
341 self.values.slice(start, end - start)
342 }
343
344 pub fn value(&self, i: usize) -> ArrayRef {
352 let end = self.value_offsets()[i + 1].as_usize();
353 let start = self.value_offsets()[i].as_usize();
354 self.values.slice(start, end - start)
355 }
356
357 #[inline]
361 pub fn value_offsets(&self) -> &[OffsetSize] {
362 &self.value_offsets
363 }
364
365 #[inline]
367 pub fn value_length(&self, i: usize) -> OffsetSize {
368 let offsets = self.value_offsets();
369 offsets[i + 1] - offsets[i]
370 }
371
372 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
374 GenericListArrayIter::<'a, OffsetSize>::new(self)
375 }
376
377 #[inline]
378 fn get_type(data_type: &DataType) -> Option<&DataType> {
379 match (OffsetSize::IS_LARGE, data_type) {
380 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
381 Some(child.data_type())
382 }
383 _ => None,
384 }
385 }
386
387 pub fn slice(&self, offset: usize, length: usize) -> Self {
393 Self {
394 data_type: self.data_type.clone(),
395 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
396 values: self.values.clone(),
397 value_offsets: self.value_offsets.slice(offset, length),
398 }
399 }
400
401 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
417 where
418 T: ArrowPrimitiveType,
419 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
420 I: IntoIterator<Item = Option<P>>,
421 {
422 let iter = iter.into_iter();
423 let size_hint = iter.size_hint().0;
424 let mut builder =
425 GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
426
427 for i in iter {
428 match i {
429 Some(p) => {
430 for t in p {
431 builder.values().append_option(t);
432 }
433 builder.append(true);
434 }
435 None => builder.append(false),
436 }
437 }
438 builder.finish()
439 }
440}
441
442impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
443 fn from(data: ArrayData) -> Self {
444 Self::try_new_from_array_data(data)
445 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
446 }
447}
448
449impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
450 fn from(array: GenericListArray<OffsetSize>) -> Self {
451 let len = array.len();
452 let builder = ArrayDataBuilder::new(array.data_type)
453 .len(len)
454 .nulls(array.nulls)
455 .buffers(vec![array.value_offsets.into_inner().into_inner()])
456 .child_data(vec![array.values.to_data()]);
457
458 unsafe { builder.build_unchecked() }
459 }
460}
461
462impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
463 fn from(value: FixedSizeListArray) -> Self {
464 let (field, size) = match value.data_type() {
465 DataType::FixedSizeList(f, size) => (f, *size as usize),
466 _ => unreachable!(),
467 };
468
469 let offsets = OffsetBuffer::from_repeated_length(size, value.len());
470
471 Self {
472 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
473 nulls: value.nulls().cloned(),
474 values: value.values().clone(),
475 value_offsets: offsets,
476 }
477 }
478}
479
480impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
481 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
482 let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
483
484 if buffers.len() != 1 {
485 return Err(ArrowError::InvalidArgumentError(format!(
486 "ListArray data should contain a single buffer only (value offsets), had {}",
487 buffers.len()
488 )));
489 }
490 let buffer = buffers.pop().expect("checked above");
491
492 if child_data.len() != 1 {
493 return Err(ArrowError::InvalidArgumentError(format!(
494 "ListArray should contain a single child array (values array), had {}",
495 child_data.len()
496 )));
497 }
498
499 let values = child_data.pop().expect("checked above");
500
501 if let Some(child_data_type) = Self::get_type(&data_type) {
502 if values.data_type() != child_data_type {
503 return Err(ArrowError::InvalidArgumentError(format!(
504 "[Large]ListArray's child datatype {:?} does not \
505 correspond to the List's datatype {:?}",
506 values.data_type(),
507 child_data_type
508 )));
509 }
510 } else {
511 return Err(ArrowError::InvalidArgumentError(format!(
512 "[Large]ListArray's datatype must be [Large]ListArray(). It is {data_type:?}",
513 )));
514 }
515
516 let values = make_array(values);
517 let value_offsets = unsafe { get_offsets_from_buffer(buffer, offset, len) };
520
521 Ok(Self {
522 data_type,
523 nulls,
524 values,
525 value_offsets,
526 })
527 }
528}
529
530unsafe impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
532 fn as_any(&self) -> &dyn Any {
533 self
534 }
535
536 fn to_data(&self) -> ArrayData {
537 self.clone().into()
538 }
539
540 fn into_data(self) -> ArrayData {
541 self.into()
542 }
543
544 fn data_type(&self) -> &DataType {
545 &self.data_type
546 }
547
548 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
549 Arc::new(self.slice(offset, length))
550 }
551
552 fn len(&self) -> usize {
553 self.value_offsets.len() - 1
554 }
555
556 fn is_empty(&self) -> bool {
557 self.value_offsets.len() <= 1
558 }
559
560 fn shrink_to_fit(&mut self) {
561 if let Some(nulls) = &mut self.nulls {
562 nulls.shrink_to_fit();
563 }
564 self.values.shrink_to_fit();
565 self.value_offsets.shrink_to_fit();
566 }
567
568 fn offset(&self) -> usize {
569 0
570 }
571
572 fn nulls(&self) -> Option<&NullBuffer> {
573 self.nulls.as_ref()
574 }
575
576 fn logical_null_count(&self) -> usize {
577 self.null_count()
579 }
580
581 fn get_buffer_memory_size(&self) -> usize {
582 let mut size = self.values.get_buffer_memory_size();
583 size += self.value_offsets.inner().inner().capacity();
584 if let Some(n) = self.nulls.as_ref() {
585 size += n.buffer().capacity();
586 }
587 size
588 }
589
590 fn get_array_memory_size(&self) -> usize {
591 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
592 size += self.value_offsets.inner().inner().capacity();
593 if let Some(n) = self.nulls.as_ref() {
594 size += n.buffer().capacity();
595 }
596 size
597 }
598}
599
600impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
601 type Item = ArrayRef;
602
603 fn value(&self, index: usize) -> Self::Item {
604 GenericListArray::value(self, index)
605 }
606
607 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
608 GenericListArray::value(self, index)
609 }
610}
611
612impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
613 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
614 let prefix = OffsetSize::PREFIX;
615
616 write!(f, "{prefix}ListArray\n[\n")?;
617 print_long_array(self, f, |array, index, f| {
618 std::fmt::Debug::fmt(&array.value(index), f)
619 })?;
620 write!(f, "]")
621 }
622}
623
624pub type ListArray = GenericListArray<i32>;
628
629pub type LargeListArray = GenericListArray<i64>;
633
634#[cfg(test)]
635mod tests {
636 use super::*;
637 use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
638 use crate::cast::AsArray;
639 use crate::types::Int32Type;
640 use crate::{Int32Array, Int64Array};
641 use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
642 use arrow_schema::Field;
643
644 fn create_from_buffers() -> ListArray {
645 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
647 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
648 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
649 ListArray::new(field, offsets, Arc::new(values), None)
650 }
651
652 #[test]
653 fn test_from_iter_primitive() {
654 let data = vec![
655 Some(vec![Some(0), Some(1), Some(2)]),
656 Some(vec![Some(3), Some(4), Some(5)]),
657 Some(vec![Some(6), Some(7)]),
658 ];
659 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
660
661 let another = create_from_buffers();
662 assert_eq!(list_array, another)
663 }
664
665 #[test]
666 fn test_empty_list_array() {
667 let value_data = ArrayData::builder(DataType::Int32)
669 .len(0)
670 .add_buffer(Buffer::from([]))
671 .build()
672 .unwrap();
673
674 let value_offsets = Buffer::from([]);
676
677 let list_data_type =
679 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
680 let list_data = ArrayData::builder(list_data_type)
681 .len(0)
682 .add_buffer(value_offsets)
683 .add_child_data(value_data)
684 .build()
685 .unwrap();
686
687 let list_array = ListArray::from(list_data);
688 assert_eq!(list_array.len(), 0)
689 }
690
691 #[test]
692 fn test_list_array() {
693 let value_data = ArrayData::builder(DataType::Int32)
695 .len(8)
696 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
697 .build()
698 .unwrap();
699
700 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
703
704 let list_data_type =
706 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
707 let list_data = ArrayData::builder(list_data_type.clone())
708 .len(3)
709 .add_buffer(value_offsets.clone())
710 .add_child_data(value_data.clone())
711 .build()
712 .unwrap();
713 let list_array = ListArray::from(list_data);
714
715 let values = list_array.values();
716 assert_eq!(value_data, values.to_data());
717 assert_eq!(DataType::Int32, list_array.value_type());
718 assert_eq!(3, list_array.len());
719 assert_eq!(0, list_array.null_count());
720 assert_eq!(6, list_array.value_offsets()[2]);
721 assert_eq!(2, list_array.value_length(2));
722 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
723 assert_eq!(
724 0,
725 unsafe { list_array.value_unchecked(0) }
726 .as_primitive::<Int32Type>()
727 .value(0)
728 );
729 for i in 0..3 {
730 assert!(list_array.is_valid(i));
731 assert!(!list_array.is_null(i));
732 }
733
734 let list_data = ArrayData::builder(list_data_type)
737 .len(2)
738 .offset(1)
739 .add_buffer(value_offsets)
740 .add_child_data(value_data.clone())
741 .build()
742 .unwrap();
743 let list_array = ListArray::from(list_data);
744
745 let values = list_array.values();
746 assert_eq!(value_data, values.to_data());
747 assert_eq!(DataType::Int32, list_array.value_type());
748 assert_eq!(2, list_array.len());
749 assert_eq!(0, list_array.null_count());
750 assert_eq!(6, list_array.value_offsets()[1]);
751 assert_eq!(2, list_array.value_length(1));
752 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
753 assert_eq!(
754 3,
755 unsafe { list_array.value_unchecked(0) }
756 .as_primitive::<Int32Type>()
757 .value(0)
758 );
759 }
760
761 #[test]
762 fn test_large_list_array() {
763 let value_data = ArrayData::builder(DataType::Int32)
765 .len(8)
766 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
767 .build()
768 .unwrap();
769
770 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
773
774 let list_data_type = DataType::new_large_list(DataType::Int32, false);
776 let list_data = ArrayData::builder(list_data_type.clone())
777 .len(3)
778 .add_buffer(value_offsets.clone())
779 .add_child_data(value_data.clone())
780 .build()
781 .unwrap();
782 let list_array = LargeListArray::from(list_data);
783
784 let values = list_array.values();
785 assert_eq!(value_data, values.to_data());
786 assert_eq!(DataType::Int32, list_array.value_type());
787 assert_eq!(3, list_array.len());
788 assert_eq!(0, list_array.null_count());
789 assert_eq!(6, list_array.value_offsets()[2]);
790 assert_eq!(2, list_array.value_length(2));
791 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
792 assert_eq!(
793 0,
794 unsafe { list_array.value_unchecked(0) }
795 .as_primitive::<Int32Type>()
796 .value(0)
797 );
798 for i in 0..3 {
799 assert!(list_array.is_valid(i));
800 assert!(!list_array.is_null(i));
801 }
802
803 let list_data = ArrayData::builder(list_data_type)
806 .len(2)
807 .offset(1)
808 .add_buffer(value_offsets)
809 .add_child_data(value_data.clone())
810 .build()
811 .unwrap();
812 let list_array = LargeListArray::from(list_data);
813
814 let values = list_array.values();
815 assert_eq!(value_data, values.to_data());
816 assert_eq!(DataType::Int32, list_array.value_type());
817 assert_eq!(2, list_array.len());
818 assert_eq!(0, list_array.null_count());
819 assert_eq!(6, list_array.value_offsets()[1]);
820 assert_eq!(2, list_array.value_length(1));
821 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
822 assert_eq!(
823 3,
824 unsafe { list_array.value_unchecked(0) }
825 .as_primitive::<Int32Type>()
826 .value(0)
827 );
828 }
829
830 #[test]
831 fn test_list_array_slice() {
832 let value_data = ArrayData::builder(DataType::Int32)
834 .len(10)
835 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
836 .build()
837 .unwrap();
838
839 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
842 let mut null_bits: [u8; 2] = [0; 2];
844 bit_util::set_bit(&mut null_bits, 0);
845 bit_util::set_bit(&mut null_bits, 3);
846 bit_util::set_bit(&mut null_bits, 4);
847 bit_util::set_bit(&mut null_bits, 6);
848 bit_util::set_bit(&mut null_bits, 8);
849
850 let list_data_type =
852 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
853 let list_data = ArrayData::builder(list_data_type)
854 .len(9)
855 .add_buffer(value_offsets)
856 .add_child_data(value_data.clone())
857 .null_bit_buffer(Some(Buffer::from(null_bits)))
858 .build()
859 .unwrap();
860 let list_array = ListArray::from(list_data);
861
862 let values = list_array.values();
863 assert_eq!(value_data, values.to_data());
864 assert_eq!(DataType::Int32, list_array.value_type());
865 assert_eq!(9, list_array.len());
866 assert_eq!(4, list_array.null_count());
867 assert_eq!(2, list_array.value_offsets()[3]);
868 assert_eq!(2, list_array.value_length(3));
869
870 let sliced_array = list_array.slice(1, 6);
871 assert_eq!(6, sliced_array.len());
872 assert_eq!(3, sliced_array.null_count());
873
874 for i in 0..sliced_array.len() {
875 if bit_util::get_bit(&null_bits, 1 + i) {
876 assert!(sliced_array.is_valid(i));
877 } else {
878 assert!(sliced_array.is_null(i));
879 }
880 }
881
882 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
884 assert_eq!(2, sliced_list_array.value_offsets()[2]);
885 assert_eq!(2, sliced_list_array.value_length(2));
886 assert_eq!(4, sliced_list_array.value_offsets()[3]);
887 assert_eq!(2, sliced_list_array.value_length(3));
888 assert_eq!(6, sliced_list_array.value_offsets()[5]);
889 assert_eq!(3, sliced_list_array.value_length(5));
890 }
891
892 #[test]
893 fn test_large_list_array_slice() {
894 let value_data = ArrayData::builder(DataType::Int32)
896 .len(10)
897 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
898 .build()
899 .unwrap();
900
901 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
904 let mut null_bits: [u8; 2] = [0; 2];
906 bit_util::set_bit(&mut null_bits, 0);
907 bit_util::set_bit(&mut null_bits, 3);
908 bit_util::set_bit(&mut null_bits, 4);
909 bit_util::set_bit(&mut null_bits, 6);
910 bit_util::set_bit(&mut null_bits, 8);
911
912 let list_data_type = DataType::new_large_list(DataType::Int32, false);
914 let list_data = ArrayData::builder(list_data_type)
915 .len(9)
916 .add_buffer(value_offsets)
917 .add_child_data(value_data.clone())
918 .null_bit_buffer(Some(Buffer::from(null_bits)))
919 .build()
920 .unwrap();
921 let list_array = LargeListArray::from(list_data);
922
923 let values = list_array.values();
924 assert_eq!(value_data, values.to_data());
925 assert_eq!(DataType::Int32, list_array.value_type());
926 assert_eq!(9, list_array.len());
927 assert_eq!(4, list_array.null_count());
928 assert_eq!(2, list_array.value_offsets()[3]);
929 assert_eq!(2, list_array.value_length(3));
930
931 let sliced_array = list_array.slice(1, 6);
932 assert_eq!(6, sliced_array.len());
933 assert_eq!(3, sliced_array.null_count());
934
935 for i in 0..sliced_array.len() {
936 if bit_util::get_bit(&null_bits, 1 + i) {
937 assert!(sliced_array.is_valid(i));
938 } else {
939 assert!(sliced_array.is_null(i));
940 }
941 }
942
943 let sliced_list_array = sliced_array
945 .as_any()
946 .downcast_ref::<LargeListArray>()
947 .unwrap();
948 assert_eq!(2, sliced_list_array.value_offsets()[2]);
949 assert_eq!(2, sliced_list_array.value_length(2));
950 assert_eq!(4, sliced_list_array.value_offsets()[3]);
951 assert_eq!(2, sliced_list_array.value_length(3));
952 assert_eq!(6, sliced_list_array.value_offsets()[5]);
953 assert_eq!(3, sliced_list_array.value_length(5));
954 }
955
956 #[test]
957 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
958 fn test_list_array_index_out_of_bound() {
959 let value_data = ArrayData::builder(DataType::Int32)
961 .len(10)
962 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
963 .build()
964 .unwrap();
965
966 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
969 let mut null_bits: [u8; 2] = [0; 2];
971 bit_util::set_bit(&mut null_bits, 0);
972 bit_util::set_bit(&mut null_bits, 3);
973 bit_util::set_bit(&mut null_bits, 4);
974 bit_util::set_bit(&mut null_bits, 6);
975 bit_util::set_bit(&mut null_bits, 8);
976
977 let list_data_type = DataType::new_large_list(DataType::Int32, false);
979 let list_data = ArrayData::builder(list_data_type)
980 .len(9)
981 .add_buffer(value_offsets)
982 .add_child_data(value_data)
983 .null_bit_buffer(Some(Buffer::from(null_bits)))
984 .build()
985 .unwrap();
986 let list_array = LargeListArray::from(list_data);
987 assert_eq!(9, list_array.len());
988
989 list_array.value(10);
990 }
991 #[test]
992 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
993 #[cfg(not(feature = "force_validate"))]
996 fn test_list_array_invalid_buffer_len() {
997 let value_data = unsafe {
998 ArrayData::builder(DataType::Int32)
999 .len(8)
1000 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1001 .build_unchecked()
1002 };
1003 let list_data_type =
1004 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1005 let list_data = unsafe {
1006 ArrayData::builder(list_data_type)
1007 .len(3)
1008 .add_child_data(value_data)
1009 .build_unchecked()
1010 };
1011 drop(ListArray::from(list_data));
1012 }
1013
1014 #[test]
1015 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1016 #[cfg(not(feature = "force_validate"))]
1019 fn test_list_array_invalid_child_array_len() {
1020 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1021 let list_data_type =
1022 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1023 let list_data = unsafe {
1024 ArrayData::builder(list_data_type)
1025 .len(3)
1026 .add_buffer(value_offsets)
1027 .build_unchecked()
1028 };
1029 drop(ListArray::from(list_data));
1030 }
1031
1032 #[test]
1033 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1034 fn test_from_array_data_validation() {
1035 let mut builder = ListBuilder::new(Int32Builder::new());
1036 builder.values().append_value(1);
1037 builder.append(true);
1038 let array = builder.finish();
1039 let _ = LargeListArray::from(array.into_data());
1040 }
1041
1042 #[test]
1043 fn test_list_array_offsets_need_not_start_at_zero() {
1044 let value_data = ArrayData::builder(DataType::Int32)
1045 .len(8)
1046 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1047 .build()
1048 .unwrap();
1049
1050 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1051
1052 let list_data_type =
1053 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1054 let list_data = ArrayData::builder(list_data_type)
1055 .len(3)
1056 .add_buffer(value_offsets)
1057 .add_child_data(value_data)
1058 .build()
1059 .unwrap();
1060
1061 let list_array = ListArray::from(list_data);
1062 assert_eq!(list_array.value_length(0), 0);
1063 assert_eq!(list_array.value_length(1), 3);
1064 assert_eq!(list_array.value_length(2), 2);
1065 }
1066
1067 #[test]
1068 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1069 #[cfg(not(feature = "force_validate"))]
1072 fn test_primitive_array_alignment() {
1073 let buf = Buffer::from_slice_ref([0_u64]);
1074 let buf2 = buf.slice(1);
1075 let array_data = unsafe {
1076 ArrayData::builder(DataType::Int32)
1077 .add_buffer(buf2)
1078 .build_unchecked()
1079 };
1080 drop(Int32Array::from(array_data));
1081 }
1082
1083 #[test]
1084 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1085 #[cfg(not(feature = "force_validate"))]
1088 fn test_list_array_alignment() {
1089 let buf = Buffer::from_slice_ref([0_u64]);
1090 let buf2 = buf.slice(1);
1091
1092 let values: [i32; 8] = [0; 8];
1093 let value_data = unsafe {
1094 ArrayData::builder(DataType::Int32)
1095 .add_buffer(Buffer::from_slice_ref(values))
1096 .build_unchecked()
1097 };
1098
1099 let list_data_type =
1100 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1101 let list_data = unsafe {
1102 ArrayData::builder(list_data_type)
1103 .add_buffer(buf2)
1104 .add_child_data(value_data)
1105 .build_unchecked()
1106 };
1107 drop(ListArray::from(list_data));
1108 }
1109
1110 #[test]
1111 fn list_array_equality() {
1112 fn do_comparison(
1114 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1115 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1116 should_equal: bool,
1117 ) {
1118 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1119 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1120 assert_eq!(lhs == rhs, should_equal);
1121
1122 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1123 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1124 assert_eq!(lhs == rhs, should_equal);
1125 }
1126
1127 do_comparison(
1128 vec![
1129 Some(vec![Some(0), Some(1), Some(2)]),
1130 None,
1131 Some(vec![Some(3), None, Some(5)]),
1132 Some(vec![Some(6), Some(7)]),
1133 ],
1134 vec![
1135 Some(vec![Some(0), Some(1), Some(2)]),
1136 None,
1137 Some(vec![Some(3), None, Some(5)]),
1138 Some(vec![Some(6), Some(7)]),
1139 ],
1140 true,
1141 );
1142
1143 do_comparison(
1144 vec![
1145 None,
1146 None,
1147 Some(vec![Some(3), None, Some(5)]),
1148 Some(vec![Some(6), Some(7)]),
1149 ],
1150 vec![
1151 Some(vec![Some(0), Some(1), Some(2)]),
1152 None,
1153 Some(vec![Some(3), None, Some(5)]),
1154 Some(vec![Some(6), Some(7)]),
1155 ],
1156 false,
1157 );
1158
1159 do_comparison(
1160 vec![
1161 None,
1162 None,
1163 Some(vec![Some(3), None, Some(5)]),
1164 Some(vec![Some(6), Some(7)]),
1165 ],
1166 vec![
1167 None,
1168 None,
1169 Some(vec![Some(3), None, Some(5)]),
1170 Some(vec![Some(0), Some(0)]),
1171 ],
1172 false,
1173 );
1174
1175 do_comparison(
1176 vec![None, None, Some(vec![Some(1)])],
1177 vec![None, None, Some(vec![Some(2)])],
1178 false,
1179 );
1180 }
1181
1182 #[test]
1183 fn test_empty_offsets() {
1184 let f = Arc::new(Field::new("element", DataType::Int32, true));
1185 let string = ListArray::from(
1186 ArrayData::builder(DataType::List(f.clone()))
1187 .buffers(vec![Buffer::from(&[])])
1188 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1189 .build()
1190 .unwrap(),
1191 );
1192 assert_eq!(string.value_offsets(), &[0]);
1193 let string = LargeListArray::from(
1194 ArrayData::builder(DataType::LargeList(f))
1195 .buffers(vec![Buffer::from(&[])])
1196 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1197 .build()
1198 .unwrap(),
1199 );
1200 assert_eq!(string.len(), 0);
1201 assert_eq!(string.value_offsets(), &[0]);
1202 }
1203
1204 #[test]
1205 fn test_try_new() {
1206 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1207 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1208 let values = Arc::new(values) as ArrayRef;
1209
1210 let field = Arc::new(Field::new("element", DataType::Int32, false));
1211 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1212
1213 let nulls = NullBuffer::new_null(3);
1214 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1215
1216 let nulls = NullBuffer::new_null(3);
1217 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1218 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1219 .unwrap_err();
1220
1221 assert_eq!(
1222 err.to_string(),
1223 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1224 );
1225
1226 let field = Arc::new(Field::new("element", DataType::Int64, false));
1227 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1228 .unwrap_err();
1229
1230 assert_eq!(
1231 err.to_string(),
1232 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1233 );
1234
1235 let nulls = NullBuffer::new_null(7);
1236 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1237 let values = Arc::new(values);
1238
1239 let err =
1240 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1241
1242 assert_eq!(
1243 err.to_string(),
1244 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1245 );
1246
1247 let field = Arc::new(Field::new("element", DataType::Int64, true));
1248 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1249
1250 let values = Int64Array::new(vec![0; 2].into(), None);
1251 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1252
1253 assert_eq!(
1254 err.to_string(),
1255 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1256 );
1257 }
1258
1259 #[test]
1260 fn test_from_fixed_size_list() {
1261 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1262 builder.values().append_slice(&[1, 2, 3]);
1263 builder.append(true);
1264 builder.values().append_slice(&[0, 0, 0]);
1265 builder.append(false);
1266 builder.values().append_slice(&[4, 5, 6]);
1267 builder.append(true);
1268 let list: ListArray = builder.finish().into();
1269
1270 let values: Vec<_> = list
1271 .iter()
1272 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1273 .collect();
1274 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1275 }
1276
1277 #[test]
1278 fn test_nullable_union() {
1279 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1280 let mut builder = UnionBuilder::new_dense();
1281 builder.append::<Int32Type>("a", 1).unwrap();
1282 builder.append::<Int32Type>("b", 2).unwrap();
1283 builder.append::<Int32Type>("b", 3).unwrap();
1284 builder.append::<Int32Type>("a", 4).unwrap();
1285 builder.append::<Int32Type>("a", 5).unwrap();
1286 let values = builder.build().unwrap();
1287 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1288 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1289 }
1290
1291 #[test]
1292 fn test_list_new_null_len() {
1293 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
1294 let array = ListArray::new_null(field, 5);
1295 assert_eq!(array.len(), 5);
1296 }
1297}