1use crate::array::{get_offsets, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
22 iterator::GenericListArrayIter, new_empty_array,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num_integer::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait:
41 ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd
42{
43 const IS_LARGE: bool;
45 const PREFIX: &'static str;
47 const MAX_OFFSET: usize;
49}
50
51impl OffsetSizeTrait for i32 {
52 const IS_LARGE: bool = false;
53 const PREFIX: &'static str = "";
54 const MAX_OFFSET: usize = i32::MAX as usize;
55}
56
57impl OffsetSizeTrait for i64 {
58 const IS_LARGE: bool = true;
59 const PREFIX: &'static str = "Large";
60 const MAX_OFFSET: usize = i64::MAX as usize;
61}
62
63pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
172 data_type: DataType,
173 nulls: Option<NullBuffer>,
174 values: ArrayRef,
175 value_offsets: OffsetBuffer<OffsetSize>,
176}
177
178impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
179 fn clone(&self) -> Self {
180 Self {
181 data_type: self.data_type.clone(),
182 nulls: self.nulls.clone(),
183 values: self.values.clone(),
184 value_offsets: self.value_offsets.clone(),
185 }
186 }
187}
188
189impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
190 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
194 DataType::LargeList
195 } else {
196 DataType::List
197 };
198
199 pub fn try_new(
210 field: FieldRef,
211 offsets: OffsetBuffer<OffsetSize>,
212 values: ArrayRef,
213 nulls: Option<NullBuffer>,
214 ) -> Result<Self, ArrowError> {
215 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
217 if end_offset > values.len() {
220 return Err(ArrowError::InvalidArgumentError(format!(
221 "Max offset of {end_offset} exceeds length of values {}",
222 values.len()
223 )));
224 }
225
226 if let Some(n) = nulls.as_ref() {
227 if n.len() != len {
228 return Err(ArrowError::InvalidArgumentError(format!(
229 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
230 OffsetSize::PREFIX,
231 n.len(),
232 )));
233 }
234 }
235 if !field.is_nullable() && values.is_nullable() {
236 return Err(ArrowError::InvalidArgumentError(format!(
237 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
238 OffsetSize::PREFIX,
239 field.name()
240 )));
241 }
242
243 if field.data_type() != values.data_type() {
244 return Err(ArrowError::InvalidArgumentError(format!(
245 "{}ListArray expected data type {} got {} for {:?}",
246 OffsetSize::PREFIX,
247 field.data_type(),
248 values.data_type(),
249 field.name()
250 )));
251 }
252
253 Ok(Self {
254 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
255 nulls,
256 values,
257 value_offsets: offsets,
258 })
259 }
260
261 pub fn new(
267 field: FieldRef,
268 offsets: OffsetBuffer<OffsetSize>,
269 values: ArrayRef,
270 nulls: Option<NullBuffer>,
271 ) -> Self {
272 Self::try_new(field, offsets, values, nulls).unwrap()
273 }
274
275 pub fn new_null(field: FieldRef, len: usize) -> Self {
277 let values = new_empty_array(field.data_type());
278 Self {
279 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
280 nulls: Some(NullBuffer::new_null(len)),
281 value_offsets: OffsetBuffer::new_zeroed(len),
282 values,
283 }
284 }
285
286 pub fn into_parts(
288 self,
289 ) -> (
290 FieldRef,
291 OffsetBuffer<OffsetSize>,
292 ArrayRef,
293 Option<NullBuffer>,
294 ) {
295 let f = match self.data_type {
296 DataType::List(f) | DataType::LargeList(f) => f,
297 _ => unreachable!(),
298 };
299 (f, self.value_offsets, self.values, self.nulls)
300 }
301
302 #[inline]
311 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
312 &self.value_offsets
313 }
314
315 #[inline]
322 pub fn values(&self) -> &ArrayRef {
323 &self.values
324 }
325
326 pub fn value_type(&self) -> DataType {
328 self.values.data_type().clone()
329 }
330
331 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
339 let end = unsafe { self.value_offsets().get_unchecked(i + 1).as_usize() };
340 let start = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
341 self.values.slice(start, end - start)
342 }
343
344 pub fn value(&self, i: usize) -> ArrayRef {
352 let end = self.value_offsets()[i + 1].as_usize();
353 let start = self.value_offsets()[i].as_usize();
354 self.values.slice(start, end - start)
355 }
356
357 #[inline]
361 pub fn value_offsets(&self) -> &[OffsetSize] {
362 &self.value_offsets
363 }
364
365 #[inline]
367 pub fn value_length(&self, i: usize) -> OffsetSize {
368 let offsets = self.value_offsets();
369 offsets[i + 1] - offsets[i]
370 }
371
372 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
374 GenericListArrayIter::<'a, OffsetSize>::new(self)
375 }
376
377 #[inline]
378 fn get_type(data_type: &DataType) -> Option<&DataType> {
379 match (OffsetSize::IS_LARGE, data_type) {
380 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
381 Some(child.data_type())
382 }
383 _ => None,
384 }
385 }
386
387 pub fn slice(&self, offset: usize, length: usize) -> Self {
393 Self {
394 data_type: self.data_type.clone(),
395 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
396 values: self.values.clone(),
397 value_offsets: self.value_offsets.slice(offset, length),
398 }
399 }
400
401 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
417 where
418 T: ArrowPrimitiveType,
419 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
420 I: IntoIterator<Item = Option<P>>,
421 {
422 let iter = iter.into_iter();
423 let size_hint = iter.size_hint().0;
424 let mut builder =
425 GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
426
427 for i in iter {
428 match i {
429 Some(p) => {
430 for t in p {
431 builder.values().append_option(t);
432 }
433 builder.append(true);
434 }
435 None => builder.append(false),
436 }
437 }
438 builder.finish()
439 }
440}
441
442impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
443 fn from(data: ArrayData) -> Self {
444 Self::try_new_from_array_data(data)
445 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
446 }
447}
448
449impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
450 fn from(array: GenericListArray<OffsetSize>) -> Self {
451 let len = array.len();
452 let builder = ArrayDataBuilder::new(array.data_type)
453 .len(len)
454 .nulls(array.nulls)
455 .buffers(vec![array.value_offsets.into_inner().into_inner()])
456 .child_data(vec![array.values.to_data()]);
457
458 unsafe { builder.build_unchecked() }
459 }
460}
461
462impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
463 fn from(value: FixedSizeListArray) -> Self {
464 let (field, size) = match value.data_type() {
465 DataType::FixedSizeList(f, size) => (f, *size as usize),
466 _ => unreachable!(),
467 };
468
469 let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(size, value.len()));
470
471 Self {
472 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
473 nulls: value.nulls().cloned(),
474 values: value.values().clone(),
475 value_offsets: offsets,
476 }
477 }
478}
479
480impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
481 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
482 if data.buffers().len() != 1 {
483 return Err(ArrowError::InvalidArgumentError(format!(
484 "ListArray data should contain a single buffer only (value offsets), had {}",
485 data.buffers().len()
486 )));
487 }
488
489 if data.child_data().len() != 1 {
490 return Err(ArrowError::InvalidArgumentError(format!(
491 "ListArray should contain a single child array (values array), had {}",
492 data.child_data().len()
493 )));
494 }
495
496 let values = data.child_data()[0].clone();
497
498 if let Some(child_data_type) = Self::get_type(data.data_type()) {
499 if values.data_type() != child_data_type {
500 return Err(ArrowError::InvalidArgumentError(format!(
501 "[Large]ListArray's child datatype {:?} does not \
502 correspond to the List's datatype {:?}",
503 values.data_type(),
504 child_data_type
505 )));
506 }
507 } else {
508 return Err(ArrowError::InvalidArgumentError(format!(
509 "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
510 data.data_type()
511 )));
512 }
513
514 let values = make_array(values);
515 let value_offsets = unsafe { get_offsets(&data) };
518
519 Ok(Self {
520 data_type: data.data_type().clone(),
521 nulls: data.nulls().cloned(),
522 values,
523 value_offsets,
524 })
525 }
526}
527
528impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
529 fn as_any(&self) -> &dyn Any {
530 self
531 }
532
533 fn to_data(&self) -> ArrayData {
534 self.clone().into()
535 }
536
537 fn into_data(self) -> ArrayData {
538 self.into()
539 }
540
541 fn data_type(&self) -> &DataType {
542 &self.data_type
543 }
544
545 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
546 Arc::new(self.slice(offset, length))
547 }
548
549 fn len(&self) -> usize {
550 self.value_offsets.len() - 1
551 }
552
553 fn is_empty(&self) -> bool {
554 self.value_offsets.len() <= 1
555 }
556
557 fn shrink_to_fit(&mut self) {
558 if let Some(nulls) = &mut self.nulls {
559 nulls.shrink_to_fit();
560 }
561 self.values.shrink_to_fit();
562 self.value_offsets.shrink_to_fit();
563 }
564
565 fn offset(&self) -> usize {
566 0
567 }
568
569 fn nulls(&self) -> Option<&NullBuffer> {
570 self.nulls.as_ref()
571 }
572
573 fn logical_null_count(&self) -> usize {
574 self.null_count()
576 }
577
578 fn get_buffer_memory_size(&self) -> usize {
579 let mut size = self.values.get_buffer_memory_size();
580 size += self.value_offsets.inner().inner().capacity();
581 if let Some(n) = self.nulls.as_ref() {
582 size += n.buffer().capacity();
583 }
584 size
585 }
586
587 fn get_array_memory_size(&self) -> usize {
588 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
589 size += self.value_offsets.inner().inner().capacity();
590 if let Some(n) = self.nulls.as_ref() {
591 size += n.buffer().capacity();
592 }
593 size
594 }
595}
596
597impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
598 type Item = ArrayRef;
599
600 fn value(&self, index: usize) -> Self::Item {
601 GenericListArray::value(self, index)
602 }
603
604 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
605 GenericListArray::value(self, index)
606 }
607}
608
609impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
610 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
611 let prefix = OffsetSize::PREFIX;
612
613 write!(f, "{prefix}ListArray\n[\n")?;
614 print_long_array(self, f, |array, index, f| {
615 std::fmt::Debug::fmt(&array.value(index), f)
616 })?;
617 write!(f, "]")
618 }
619}
620
621pub type ListArray = GenericListArray<i32>;
625
626pub type LargeListArray = GenericListArray<i64>;
630
631#[cfg(test)]
632mod tests {
633 use super::*;
634 use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
635 use crate::cast::AsArray;
636 use crate::types::Int32Type;
637 use crate::{Int32Array, Int64Array};
638 use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
639 use arrow_schema::Field;
640
641 fn create_from_buffers() -> ListArray {
642 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
644 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
645 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
646 ListArray::new(field, offsets, Arc::new(values), None)
647 }
648
649 #[test]
650 fn test_from_iter_primitive() {
651 let data = vec![
652 Some(vec![Some(0), Some(1), Some(2)]),
653 Some(vec![Some(3), Some(4), Some(5)]),
654 Some(vec![Some(6), Some(7)]),
655 ];
656 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
657
658 let another = create_from_buffers();
659 assert_eq!(list_array, another)
660 }
661
662 #[test]
663 fn test_empty_list_array() {
664 let value_data = ArrayData::builder(DataType::Int32)
666 .len(0)
667 .add_buffer(Buffer::from([]))
668 .build()
669 .unwrap();
670
671 let value_offsets = Buffer::from([]);
673
674 let list_data_type =
676 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
677 let list_data = ArrayData::builder(list_data_type)
678 .len(0)
679 .add_buffer(value_offsets)
680 .add_child_data(value_data)
681 .build()
682 .unwrap();
683
684 let list_array = ListArray::from(list_data);
685 assert_eq!(list_array.len(), 0)
686 }
687
688 #[test]
689 fn test_list_array() {
690 let value_data = ArrayData::builder(DataType::Int32)
692 .len(8)
693 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
694 .build()
695 .unwrap();
696
697 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
700
701 let list_data_type =
703 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
704 let list_data = ArrayData::builder(list_data_type.clone())
705 .len(3)
706 .add_buffer(value_offsets.clone())
707 .add_child_data(value_data.clone())
708 .build()
709 .unwrap();
710 let list_array = ListArray::from(list_data);
711
712 let values = list_array.values();
713 assert_eq!(value_data, values.to_data());
714 assert_eq!(DataType::Int32, list_array.value_type());
715 assert_eq!(3, list_array.len());
716 assert_eq!(0, list_array.null_count());
717 assert_eq!(6, list_array.value_offsets()[2]);
718 assert_eq!(2, list_array.value_length(2));
719 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
720 assert_eq!(
721 0,
722 unsafe { list_array.value_unchecked(0) }
723 .as_primitive::<Int32Type>()
724 .value(0)
725 );
726 for i in 0..3 {
727 assert!(list_array.is_valid(i));
728 assert!(!list_array.is_null(i));
729 }
730
731 let list_data = ArrayData::builder(list_data_type)
734 .len(2)
735 .offset(1)
736 .add_buffer(value_offsets)
737 .add_child_data(value_data.clone())
738 .build()
739 .unwrap();
740 let list_array = ListArray::from(list_data);
741
742 let values = list_array.values();
743 assert_eq!(value_data, values.to_data());
744 assert_eq!(DataType::Int32, list_array.value_type());
745 assert_eq!(2, list_array.len());
746 assert_eq!(0, list_array.null_count());
747 assert_eq!(6, list_array.value_offsets()[1]);
748 assert_eq!(2, list_array.value_length(1));
749 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
750 assert_eq!(
751 3,
752 unsafe { list_array.value_unchecked(0) }
753 .as_primitive::<Int32Type>()
754 .value(0)
755 );
756 }
757
758 #[test]
759 fn test_large_list_array() {
760 let value_data = ArrayData::builder(DataType::Int32)
762 .len(8)
763 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
764 .build()
765 .unwrap();
766
767 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
770
771 let list_data_type = DataType::new_large_list(DataType::Int32, false);
773 let list_data = ArrayData::builder(list_data_type.clone())
774 .len(3)
775 .add_buffer(value_offsets.clone())
776 .add_child_data(value_data.clone())
777 .build()
778 .unwrap();
779 let list_array = LargeListArray::from(list_data);
780
781 let values = list_array.values();
782 assert_eq!(value_data, values.to_data());
783 assert_eq!(DataType::Int32, list_array.value_type());
784 assert_eq!(3, list_array.len());
785 assert_eq!(0, list_array.null_count());
786 assert_eq!(6, list_array.value_offsets()[2]);
787 assert_eq!(2, list_array.value_length(2));
788 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
789 assert_eq!(
790 0,
791 unsafe { list_array.value_unchecked(0) }
792 .as_primitive::<Int32Type>()
793 .value(0)
794 );
795 for i in 0..3 {
796 assert!(list_array.is_valid(i));
797 assert!(!list_array.is_null(i));
798 }
799
800 let list_data = ArrayData::builder(list_data_type)
803 .len(2)
804 .offset(1)
805 .add_buffer(value_offsets)
806 .add_child_data(value_data.clone())
807 .build()
808 .unwrap();
809 let list_array = LargeListArray::from(list_data);
810
811 let values = list_array.values();
812 assert_eq!(value_data, values.to_data());
813 assert_eq!(DataType::Int32, list_array.value_type());
814 assert_eq!(2, list_array.len());
815 assert_eq!(0, list_array.null_count());
816 assert_eq!(6, list_array.value_offsets()[1]);
817 assert_eq!(2, list_array.value_length(1));
818 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
819 assert_eq!(
820 3,
821 unsafe { list_array.value_unchecked(0) }
822 .as_primitive::<Int32Type>()
823 .value(0)
824 );
825 }
826
827 #[test]
828 fn test_list_array_slice() {
829 let value_data = ArrayData::builder(DataType::Int32)
831 .len(10)
832 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
833 .build()
834 .unwrap();
835
836 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
839 let mut null_bits: [u8; 2] = [0; 2];
841 bit_util::set_bit(&mut null_bits, 0);
842 bit_util::set_bit(&mut null_bits, 3);
843 bit_util::set_bit(&mut null_bits, 4);
844 bit_util::set_bit(&mut null_bits, 6);
845 bit_util::set_bit(&mut null_bits, 8);
846
847 let list_data_type =
849 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
850 let list_data = ArrayData::builder(list_data_type)
851 .len(9)
852 .add_buffer(value_offsets)
853 .add_child_data(value_data.clone())
854 .null_bit_buffer(Some(Buffer::from(null_bits)))
855 .build()
856 .unwrap();
857 let list_array = ListArray::from(list_data);
858
859 let values = list_array.values();
860 assert_eq!(value_data, values.to_data());
861 assert_eq!(DataType::Int32, list_array.value_type());
862 assert_eq!(9, list_array.len());
863 assert_eq!(4, list_array.null_count());
864 assert_eq!(2, list_array.value_offsets()[3]);
865 assert_eq!(2, list_array.value_length(3));
866
867 let sliced_array = list_array.slice(1, 6);
868 assert_eq!(6, sliced_array.len());
869 assert_eq!(3, sliced_array.null_count());
870
871 for i in 0..sliced_array.len() {
872 if bit_util::get_bit(&null_bits, 1 + i) {
873 assert!(sliced_array.is_valid(i));
874 } else {
875 assert!(sliced_array.is_null(i));
876 }
877 }
878
879 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
881 assert_eq!(2, sliced_list_array.value_offsets()[2]);
882 assert_eq!(2, sliced_list_array.value_length(2));
883 assert_eq!(4, sliced_list_array.value_offsets()[3]);
884 assert_eq!(2, sliced_list_array.value_length(3));
885 assert_eq!(6, sliced_list_array.value_offsets()[5]);
886 assert_eq!(3, sliced_list_array.value_length(5));
887 }
888
889 #[test]
890 fn test_large_list_array_slice() {
891 let value_data = ArrayData::builder(DataType::Int32)
893 .len(10)
894 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
895 .build()
896 .unwrap();
897
898 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
901 let mut null_bits: [u8; 2] = [0; 2];
903 bit_util::set_bit(&mut null_bits, 0);
904 bit_util::set_bit(&mut null_bits, 3);
905 bit_util::set_bit(&mut null_bits, 4);
906 bit_util::set_bit(&mut null_bits, 6);
907 bit_util::set_bit(&mut null_bits, 8);
908
909 let list_data_type = DataType::new_large_list(DataType::Int32, false);
911 let list_data = ArrayData::builder(list_data_type)
912 .len(9)
913 .add_buffer(value_offsets)
914 .add_child_data(value_data.clone())
915 .null_bit_buffer(Some(Buffer::from(null_bits)))
916 .build()
917 .unwrap();
918 let list_array = LargeListArray::from(list_data);
919
920 let values = list_array.values();
921 assert_eq!(value_data, values.to_data());
922 assert_eq!(DataType::Int32, list_array.value_type());
923 assert_eq!(9, list_array.len());
924 assert_eq!(4, list_array.null_count());
925 assert_eq!(2, list_array.value_offsets()[3]);
926 assert_eq!(2, list_array.value_length(3));
927
928 let sliced_array = list_array.slice(1, 6);
929 assert_eq!(6, sliced_array.len());
930 assert_eq!(3, sliced_array.null_count());
931
932 for i in 0..sliced_array.len() {
933 if bit_util::get_bit(&null_bits, 1 + i) {
934 assert!(sliced_array.is_valid(i));
935 } else {
936 assert!(sliced_array.is_null(i));
937 }
938 }
939
940 let sliced_list_array = sliced_array
942 .as_any()
943 .downcast_ref::<LargeListArray>()
944 .unwrap();
945 assert_eq!(2, sliced_list_array.value_offsets()[2]);
946 assert_eq!(2, sliced_list_array.value_length(2));
947 assert_eq!(4, sliced_list_array.value_offsets()[3]);
948 assert_eq!(2, sliced_list_array.value_length(3));
949 assert_eq!(6, sliced_list_array.value_offsets()[5]);
950 assert_eq!(3, sliced_list_array.value_length(5));
951 }
952
953 #[test]
954 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
955 fn test_list_array_index_out_of_bound() {
956 let value_data = ArrayData::builder(DataType::Int32)
958 .len(10)
959 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
960 .build()
961 .unwrap();
962
963 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
966 let mut null_bits: [u8; 2] = [0; 2];
968 bit_util::set_bit(&mut null_bits, 0);
969 bit_util::set_bit(&mut null_bits, 3);
970 bit_util::set_bit(&mut null_bits, 4);
971 bit_util::set_bit(&mut null_bits, 6);
972 bit_util::set_bit(&mut null_bits, 8);
973
974 let list_data_type = DataType::new_large_list(DataType::Int32, false);
976 let list_data = ArrayData::builder(list_data_type)
977 .len(9)
978 .add_buffer(value_offsets)
979 .add_child_data(value_data)
980 .null_bit_buffer(Some(Buffer::from(null_bits)))
981 .build()
982 .unwrap();
983 let list_array = LargeListArray::from(list_data);
984 assert_eq!(9, list_array.len());
985
986 list_array.value(10);
987 }
988 #[test]
989 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
990 #[cfg(not(feature = "force_validate"))]
993 fn test_list_array_invalid_buffer_len() {
994 let value_data = unsafe {
995 ArrayData::builder(DataType::Int32)
996 .len(8)
997 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
998 .build_unchecked()
999 };
1000 let list_data_type =
1001 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1002 let list_data = unsafe {
1003 ArrayData::builder(list_data_type)
1004 .len(3)
1005 .add_child_data(value_data)
1006 .build_unchecked()
1007 };
1008 drop(ListArray::from(list_data));
1009 }
1010
1011 #[test]
1012 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1013 #[cfg(not(feature = "force_validate"))]
1016 fn test_list_array_invalid_child_array_len() {
1017 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1018 let list_data_type =
1019 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1020 let list_data = unsafe {
1021 ArrayData::builder(list_data_type)
1022 .len(3)
1023 .add_buffer(value_offsets)
1024 .build_unchecked()
1025 };
1026 drop(ListArray::from(list_data));
1027 }
1028
1029 #[test]
1030 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1031 fn test_from_array_data_validation() {
1032 let mut builder = ListBuilder::new(Int32Builder::new());
1033 builder.values().append_value(1);
1034 builder.append(true);
1035 let array = builder.finish();
1036 let _ = LargeListArray::from(array.into_data());
1037 }
1038
1039 #[test]
1040 fn test_list_array_offsets_need_not_start_at_zero() {
1041 let value_data = ArrayData::builder(DataType::Int32)
1042 .len(8)
1043 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1044 .build()
1045 .unwrap();
1046
1047 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1048
1049 let list_data_type =
1050 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1051 let list_data = ArrayData::builder(list_data_type)
1052 .len(3)
1053 .add_buffer(value_offsets)
1054 .add_child_data(value_data)
1055 .build()
1056 .unwrap();
1057
1058 let list_array = ListArray::from(list_data);
1059 assert_eq!(list_array.value_length(0), 0);
1060 assert_eq!(list_array.value_length(1), 3);
1061 assert_eq!(list_array.value_length(2), 2);
1062 }
1063
1064 #[test]
1065 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1066 #[cfg(not(feature = "force_validate"))]
1069 fn test_primitive_array_alignment() {
1070 let buf = Buffer::from_slice_ref([0_u64]);
1071 let buf2 = buf.slice(1);
1072 let array_data = unsafe {
1073 ArrayData::builder(DataType::Int32)
1074 .add_buffer(buf2)
1075 .build_unchecked()
1076 };
1077 drop(Int32Array::from(array_data));
1078 }
1079
1080 #[test]
1081 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1082 #[cfg(not(feature = "force_validate"))]
1085 fn test_list_array_alignment() {
1086 let buf = Buffer::from_slice_ref([0_u64]);
1087 let buf2 = buf.slice(1);
1088
1089 let values: [i32; 8] = [0; 8];
1090 let value_data = unsafe {
1091 ArrayData::builder(DataType::Int32)
1092 .add_buffer(Buffer::from_slice_ref(values))
1093 .build_unchecked()
1094 };
1095
1096 let list_data_type =
1097 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1098 let list_data = unsafe {
1099 ArrayData::builder(list_data_type)
1100 .add_buffer(buf2)
1101 .add_child_data(value_data)
1102 .build_unchecked()
1103 };
1104 drop(ListArray::from(list_data));
1105 }
1106
1107 #[test]
1108 fn list_array_equality() {
1109 fn do_comparison(
1111 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1112 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1113 should_equal: bool,
1114 ) {
1115 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1116 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1117 assert_eq!(lhs == rhs, should_equal);
1118
1119 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1120 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1121 assert_eq!(lhs == rhs, should_equal);
1122 }
1123
1124 do_comparison(
1125 vec![
1126 Some(vec![Some(0), Some(1), Some(2)]),
1127 None,
1128 Some(vec![Some(3), None, Some(5)]),
1129 Some(vec![Some(6), Some(7)]),
1130 ],
1131 vec![
1132 Some(vec![Some(0), Some(1), Some(2)]),
1133 None,
1134 Some(vec![Some(3), None, Some(5)]),
1135 Some(vec![Some(6), Some(7)]),
1136 ],
1137 true,
1138 );
1139
1140 do_comparison(
1141 vec![
1142 None,
1143 None,
1144 Some(vec![Some(3), None, Some(5)]),
1145 Some(vec![Some(6), Some(7)]),
1146 ],
1147 vec![
1148 Some(vec![Some(0), Some(1), Some(2)]),
1149 None,
1150 Some(vec![Some(3), None, Some(5)]),
1151 Some(vec![Some(6), Some(7)]),
1152 ],
1153 false,
1154 );
1155
1156 do_comparison(
1157 vec![
1158 None,
1159 None,
1160 Some(vec![Some(3), None, Some(5)]),
1161 Some(vec![Some(6), Some(7)]),
1162 ],
1163 vec![
1164 None,
1165 None,
1166 Some(vec![Some(3), None, Some(5)]),
1167 Some(vec![Some(0), Some(0)]),
1168 ],
1169 false,
1170 );
1171
1172 do_comparison(
1173 vec![None, None, Some(vec![Some(1)])],
1174 vec![None, None, Some(vec![Some(2)])],
1175 false,
1176 );
1177 }
1178
1179 #[test]
1180 fn test_empty_offsets() {
1181 let f = Arc::new(Field::new("element", DataType::Int32, true));
1182 let string = ListArray::from(
1183 ArrayData::builder(DataType::List(f.clone()))
1184 .buffers(vec![Buffer::from(&[])])
1185 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1186 .build()
1187 .unwrap(),
1188 );
1189 assert_eq!(string.value_offsets(), &[0]);
1190 let string = LargeListArray::from(
1191 ArrayData::builder(DataType::LargeList(f))
1192 .buffers(vec![Buffer::from(&[])])
1193 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1194 .build()
1195 .unwrap(),
1196 );
1197 assert_eq!(string.len(), 0);
1198 assert_eq!(string.value_offsets(), &[0]);
1199 }
1200
1201 #[test]
1202 fn test_try_new() {
1203 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1204 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1205 let values = Arc::new(values) as ArrayRef;
1206
1207 let field = Arc::new(Field::new("element", DataType::Int32, false));
1208 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1209
1210 let nulls = NullBuffer::new_null(3);
1211 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1212
1213 let nulls = NullBuffer::new_null(3);
1214 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1215 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1216 .unwrap_err();
1217
1218 assert_eq!(
1219 err.to_string(),
1220 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1221 );
1222
1223 let field = Arc::new(Field::new("element", DataType::Int64, false));
1224 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1225 .unwrap_err();
1226
1227 assert_eq!(
1228 err.to_string(),
1229 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1230 );
1231
1232 let nulls = NullBuffer::new_null(7);
1233 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1234 let values = Arc::new(values);
1235
1236 let err =
1237 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1238
1239 assert_eq!(
1240 err.to_string(),
1241 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1242 );
1243
1244 let field = Arc::new(Field::new("element", DataType::Int64, true));
1245 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1246
1247 let values = Int64Array::new(vec![0; 2].into(), None);
1248 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1249
1250 assert_eq!(
1251 err.to_string(),
1252 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1253 );
1254 }
1255
1256 #[test]
1257 fn test_from_fixed_size_list() {
1258 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1259 builder.values().append_slice(&[1, 2, 3]);
1260 builder.append(true);
1261 builder.values().append_slice(&[0, 0, 0]);
1262 builder.append(false);
1263 builder.values().append_slice(&[4, 5, 6]);
1264 builder.append(true);
1265 let list: ListArray = builder.finish().into();
1266
1267 let values: Vec<_> = list
1268 .iter()
1269 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1270 .collect();
1271 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1272 }
1273
1274 #[test]
1275 fn test_nullable_union() {
1276 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1277 let mut builder = UnionBuilder::new_dense();
1278 builder.append::<Int32Type>("a", 1).unwrap();
1279 builder.append::<Int32Type>("b", 2).unwrap();
1280 builder.append::<Int32Type>("b", 3).unwrap();
1281 builder.append::<Int32Type>("a", 4).unwrap();
1282 builder.append::<Int32Type>("a", 5).unwrap();
1283 let values = builder.build().unwrap();
1284 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1285 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1286 }
1287}