1use crate::array::{get_offsets, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
22 ArrowPrimitiveType, FixedSizeListArray,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
41 const IS_LARGE: bool;
43 const PREFIX: &'static str;
45 const MAX_OFFSET: usize;
47}
48
49impl OffsetSizeTrait for i32 {
50 const IS_LARGE: bool = false;
51 const PREFIX: &'static str = "";
52 const MAX_OFFSET: usize = i32::MAX as usize;
53}
54
55impl OffsetSizeTrait for i64 {
56 const IS_LARGE: bool = true;
57 const PREFIX: &'static str = "Large";
58 const MAX_OFFSET: usize = i64::MAX as usize;
59}
60
61pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
170 data_type: DataType,
171 nulls: Option<NullBuffer>,
172 values: ArrayRef,
173 value_offsets: OffsetBuffer<OffsetSize>,
174}
175
176impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
177 fn clone(&self) -> Self {
178 Self {
179 data_type: self.data_type.clone(),
180 nulls: self.nulls.clone(),
181 values: self.values.clone(),
182 value_offsets: self.value_offsets.clone(),
183 }
184 }
185}
186
187impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
188 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
192 DataType::LargeList
193 } else {
194 DataType::List
195 };
196
197 pub fn try_new(
208 field: FieldRef,
209 offsets: OffsetBuffer<OffsetSize>,
210 values: ArrayRef,
211 nulls: Option<NullBuffer>,
212 ) -> Result<Self, ArrowError> {
213 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
215 if end_offset > values.len() {
218 return Err(ArrowError::InvalidArgumentError(format!(
219 "Max offset of {end_offset} exceeds length of values {}",
220 values.len()
221 )));
222 }
223
224 if let Some(n) = nulls.as_ref() {
225 if n.len() != len {
226 return Err(ArrowError::InvalidArgumentError(format!(
227 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
228 OffsetSize::PREFIX,
229 n.len(),
230 )));
231 }
232 }
233 if !field.is_nullable() && values.is_nullable() {
234 return Err(ArrowError::InvalidArgumentError(format!(
235 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
236 OffsetSize::PREFIX,
237 field.name()
238 )));
239 }
240
241 if field.data_type() != values.data_type() {
242 return Err(ArrowError::InvalidArgumentError(format!(
243 "{}ListArray expected data type {} got {} for {:?}",
244 OffsetSize::PREFIX,
245 field.data_type(),
246 values.data_type(),
247 field.name()
248 )));
249 }
250
251 Ok(Self {
252 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
253 nulls,
254 values,
255 value_offsets: offsets,
256 })
257 }
258
259 pub fn new(
265 field: FieldRef,
266 offsets: OffsetBuffer<OffsetSize>,
267 values: ArrayRef,
268 nulls: Option<NullBuffer>,
269 ) -> Self {
270 Self::try_new(field, offsets, values, nulls).unwrap()
271 }
272
273 pub fn new_null(field: FieldRef, len: usize) -> Self {
275 let values = new_empty_array(field.data_type());
276 Self {
277 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
278 nulls: Some(NullBuffer::new_null(len)),
279 value_offsets: OffsetBuffer::new_zeroed(len),
280 values,
281 }
282 }
283
284 pub fn into_parts(
286 self,
287 ) -> (
288 FieldRef,
289 OffsetBuffer<OffsetSize>,
290 ArrayRef,
291 Option<NullBuffer>,
292 ) {
293 let f = match self.data_type {
294 DataType::List(f) | DataType::LargeList(f) => f,
295 _ => unreachable!(),
296 };
297 (f, self.value_offsets, self.values, self.nulls)
298 }
299
300 #[inline]
309 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
310 &self.value_offsets
311 }
312
313 #[inline]
320 pub fn values(&self) -> &ArrayRef {
321 &self.values
322 }
323
324 pub fn value_type(&self) -> DataType {
326 self.values.data_type().clone()
327 }
328
329 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
333 let end = self.value_offsets().get_unchecked(i + 1).as_usize();
334 let start = self.value_offsets().get_unchecked(i).as_usize();
335 self.values.slice(start, end - start)
336 }
337
338 pub fn value(&self, i: usize) -> ArrayRef {
340 let end = self.value_offsets()[i + 1].as_usize();
341 let start = self.value_offsets()[i].as_usize();
342 self.values.slice(start, end - start)
343 }
344
345 #[inline]
349 pub fn value_offsets(&self) -> &[OffsetSize] {
350 &self.value_offsets
351 }
352
353 #[inline]
355 pub fn value_length(&self, i: usize) -> OffsetSize {
356 let offsets = self.value_offsets();
357 offsets[i + 1] - offsets[i]
358 }
359
360 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
362 GenericListArrayIter::<'a, OffsetSize>::new(self)
363 }
364
365 #[inline]
366 fn get_type(data_type: &DataType) -> Option<&DataType> {
367 match (OffsetSize::IS_LARGE, data_type) {
368 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
369 Some(child.data_type())
370 }
371 _ => None,
372 }
373 }
374
375 pub fn slice(&self, offset: usize, length: usize) -> Self {
381 Self {
382 data_type: self.data_type.clone(),
383 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
384 values: self.values.clone(),
385 value_offsets: self.value_offsets.slice(offset, length),
386 }
387 }
388
389 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
405 where
406 T: ArrowPrimitiveType,
407 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
408 I: IntoIterator<Item = Option<P>>,
409 {
410 let iter = iter.into_iter();
411 let size_hint = iter.size_hint().0;
412 let mut builder =
413 GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
414
415 for i in iter {
416 match i {
417 Some(p) => {
418 for t in p {
419 builder.values().append_option(t);
420 }
421 builder.append(true);
422 }
423 None => builder.append(false),
424 }
425 }
426 builder.finish()
427 }
428}
429
430impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
431 fn from(data: ArrayData) -> Self {
432 Self::try_new_from_array_data(data)
433 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
434 }
435}
436
437impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
438 fn from(array: GenericListArray<OffsetSize>) -> Self {
439 let len = array.len();
440 let builder = ArrayDataBuilder::new(array.data_type)
441 .len(len)
442 .nulls(array.nulls)
443 .buffers(vec![array.value_offsets.into_inner().into_inner()])
444 .child_data(vec![array.values.to_data()]);
445
446 unsafe { builder.build_unchecked() }
447 }
448}
449
450impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
451 fn from(value: FixedSizeListArray) -> Self {
452 let (field, size) = match value.data_type() {
453 DataType::FixedSizeList(f, size) => (f, *size as usize),
454 _ => unreachable!(),
455 };
456
457 let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
458
459 Self {
460 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
461 nulls: value.nulls().cloned(),
462 values: value.values().clone(),
463 value_offsets: offsets,
464 }
465 }
466}
467
468impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
469 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
470 if data.buffers().len() != 1 {
471 return Err(ArrowError::InvalidArgumentError(format!(
472 "ListArray data should contain a single buffer only (value offsets), had {}",
473 data.buffers().len()
474 )));
475 }
476
477 if data.child_data().len() != 1 {
478 return Err(ArrowError::InvalidArgumentError(format!(
479 "ListArray should contain a single child array (values array), had {}",
480 data.child_data().len()
481 )));
482 }
483
484 let values = data.child_data()[0].clone();
485
486 if let Some(child_data_type) = Self::get_type(data.data_type()) {
487 if values.data_type() != child_data_type {
488 return Err(ArrowError::InvalidArgumentError(format!(
489 "[Large]ListArray's child datatype {:?} does not \
490 correspond to the List's datatype {:?}",
491 values.data_type(),
492 child_data_type
493 )));
494 }
495 } else {
496 return Err(ArrowError::InvalidArgumentError(format!(
497 "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
498 data.data_type()
499 )));
500 }
501
502 let values = make_array(values);
503 let value_offsets = unsafe { get_offsets(&data) };
506
507 Ok(Self {
508 data_type: data.data_type().clone(),
509 nulls: data.nulls().cloned(),
510 values,
511 value_offsets,
512 })
513 }
514}
515
516impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
517 fn as_any(&self) -> &dyn Any {
518 self
519 }
520
521 fn to_data(&self) -> ArrayData {
522 self.clone().into()
523 }
524
525 fn into_data(self) -> ArrayData {
526 self.into()
527 }
528
529 fn data_type(&self) -> &DataType {
530 &self.data_type
531 }
532
533 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
534 Arc::new(self.slice(offset, length))
535 }
536
537 fn len(&self) -> usize {
538 self.value_offsets.len() - 1
539 }
540
541 fn is_empty(&self) -> bool {
542 self.value_offsets.len() <= 1
543 }
544
545 fn shrink_to_fit(&mut self) {
546 if let Some(nulls) = &mut self.nulls {
547 nulls.shrink_to_fit();
548 }
549 self.values.shrink_to_fit();
550 self.value_offsets.shrink_to_fit();
551 }
552
553 fn offset(&self) -> usize {
554 0
555 }
556
557 fn nulls(&self) -> Option<&NullBuffer> {
558 self.nulls.as_ref()
559 }
560
561 fn logical_null_count(&self) -> usize {
562 self.null_count()
564 }
565
566 fn get_buffer_memory_size(&self) -> usize {
567 let mut size = self.values.get_buffer_memory_size();
568 size += self.value_offsets.inner().inner().capacity();
569 if let Some(n) = self.nulls.as_ref() {
570 size += n.buffer().capacity();
571 }
572 size
573 }
574
575 fn get_array_memory_size(&self) -> usize {
576 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
577 size += self.value_offsets.inner().inner().capacity();
578 if let Some(n) = self.nulls.as_ref() {
579 size += n.buffer().capacity();
580 }
581 size
582 }
583}
584
585impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
586 type Item = ArrayRef;
587
588 fn value(&self, index: usize) -> Self::Item {
589 GenericListArray::value(self, index)
590 }
591
592 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
593 GenericListArray::value(self, index)
594 }
595}
596
597impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
598 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
599 let prefix = OffsetSize::PREFIX;
600
601 write!(f, "{prefix}ListArray\n[\n")?;
602 print_long_array(self, f, |array, index, f| {
603 std::fmt::Debug::fmt(&array.value(index), f)
604 })?;
605 write!(f, "]")
606 }
607}
608
609pub type ListArray = GenericListArray<i32>;
613
614pub type LargeListArray = GenericListArray<i64>;
618
619#[cfg(test)]
620mod tests {
621 use super::*;
622 use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
623 use crate::cast::AsArray;
624 use crate::types::Int32Type;
625 use crate::{Int32Array, Int64Array};
626 use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
627 use arrow_schema::Field;
628
629 fn create_from_buffers() -> ListArray {
630 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
632 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
633 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
634 ListArray::new(field, offsets, Arc::new(values), None)
635 }
636
637 #[test]
638 fn test_from_iter_primitive() {
639 let data = vec![
640 Some(vec![Some(0), Some(1), Some(2)]),
641 Some(vec![Some(3), Some(4), Some(5)]),
642 Some(vec![Some(6), Some(7)]),
643 ];
644 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
645
646 let another = create_from_buffers();
647 assert_eq!(list_array, another)
648 }
649
650 #[test]
651 fn test_empty_list_array() {
652 let value_data = ArrayData::builder(DataType::Int32)
654 .len(0)
655 .add_buffer(Buffer::from([]))
656 .build()
657 .unwrap();
658
659 let value_offsets = Buffer::from([]);
661
662 let list_data_type =
664 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
665 let list_data = ArrayData::builder(list_data_type)
666 .len(0)
667 .add_buffer(value_offsets)
668 .add_child_data(value_data)
669 .build()
670 .unwrap();
671
672 let list_array = ListArray::from(list_data);
673 assert_eq!(list_array.len(), 0)
674 }
675
676 #[test]
677 fn test_list_array() {
678 let value_data = ArrayData::builder(DataType::Int32)
680 .len(8)
681 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
682 .build()
683 .unwrap();
684
685 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
688
689 let list_data_type =
691 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
692 let list_data = ArrayData::builder(list_data_type.clone())
693 .len(3)
694 .add_buffer(value_offsets.clone())
695 .add_child_data(value_data.clone())
696 .build()
697 .unwrap();
698 let list_array = ListArray::from(list_data);
699
700 let values = list_array.values();
701 assert_eq!(value_data, values.to_data());
702 assert_eq!(DataType::Int32, list_array.value_type());
703 assert_eq!(3, list_array.len());
704 assert_eq!(0, list_array.null_count());
705 assert_eq!(6, list_array.value_offsets()[2]);
706 assert_eq!(2, list_array.value_length(2));
707 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
708 assert_eq!(
709 0,
710 unsafe { list_array.value_unchecked(0) }
711 .as_primitive::<Int32Type>()
712 .value(0)
713 );
714 for i in 0..3 {
715 assert!(list_array.is_valid(i));
716 assert!(!list_array.is_null(i));
717 }
718
719 let list_data = ArrayData::builder(list_data_type)
722 .len(2)
723 .offset(1)
724 .add_buffer(value_offsets)
725 .add_child_data(value_data.clone())
726 .build()
727 .unwrap();
728 let list_array = ListArray::from(list_data);
729
730 let values = list_array.values();
731 assert_eq!(value_data, values.to_data());
732 assert_eq!(DataType::Int32, list_array.value_type());
733 assert_eq!(2, list_array.len());
734 assert_eq!(0, list_array.null_count());
735 assert_eq!(6, list_array.value_offsets()[1]);
736 assert_eq!(2, list_array.value_length(1));
737 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
738 assert_eq!(
739 3,
740 unsafe { list_array.value_unchecked(0) }
741 .as_primitive::<Int32Type>()
742 .value(0)
743 );
744 }
745
746 #[test]
747 fn test_large_list_array() {
748 let value_data = ArrayData::builder(DataType::Int32)
750 .len(8)
751 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
752 .build()
753 .unwrap();
754
755 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
758
759 let list_data_type = DataType::new_large_list(DataType::Int32, false);
761 let list_data = ArrayData::builder(list_data_type.clone())
762 .len(3)
763 .add_buffer(value_offsets.clone())
764 .add_child_data(value_data.clone())
765 .build()
766 .unwrap();
767 let list_array = LargeListArray::from(list_data);
768
769 let values = list_array.values();
770 assert_eq!(value_data, values.to_data());
771 assert_eq!(DataType::Int32, list_array.value_type());
772 assert_eq!(3, list_array.len());
773 assert_eq!(0, list_array.null_count());
774 assert_eq!(6, list_array.value_offsets()[2]);
775 assert_eq!(2, list_array.value_length(2));
776 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
777 assert_eq!(
778 0,
779 unsafe { list_array.value_unchecked(0) }
780 .as_primitive::<Int32Type>()
781 .value(0)
782 );
783 for i in 0..3 {
784 assert!(list_array.is_valid(i));
785 assert!(!list_array.is_null(i));
786 }
787
788 let list_data = ArrayData::builder(list_data_type)
791 .len(2)
792 .offset(1)
793 .add_buffer(value_offsets)
794 .add_child_data(value_data.clone())
795 .build()
796 .unwrap();
797 let list_array = LargeListArray::from(list_data);
798
799 let values = list_array.values();
800 assert_eq!(value_data, values.to_data());
801 assert_eq!(DataType::Int32, list_array.value_type());
802 assert_eq!(2, list_array.len());
803 assert_eq!(0, list_array.null_count());
804 assert_eq!(6, list_array.value_offsets()[1]);
805 assert_eq!(2, list_array.value_length(1));
806 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
807 assert_eq!(
808 3,
809 unsafe { list_array.value_unchecked(0) }
810 .as_primitive::<Int32Type>()
811 .value(0)
812 );
813 }
814
815 #[test]
816 fn test_list_array_slice() {
817 let value_data = ArrayData::builder(DataType::Int32)
819 .len(10)
820 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
821 .build()
822 .unwrap();
823
824 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
827 let mut null_bits: [u8; 2] = [0; 2];
829 bit_util::set_bit(&mut null_bits, 0);
830 bit_util::set_bit(&mut null_bits, 3);
831 bit_util::set_bit(&mut null_bits, 4);
832 bit_util::set_bit(&mut null_bits, 6);
833 bit_util::set_bit(&mut null_bits, 8);
834
835 let list_data_type =
837 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
838 let list_data = ArrayData::builder(list_data_type)
839 .len(9)
840 .add_buffer(value_offsets)
841 .add_child_data(value_data.clone())
842 .null_bit_buffer(Some(Buffer::from(null_bits)))
843 .build()
844 .unwrap();
845 let list_array = ListArray::from(list_data);
846
847 let values = list_array.values();
848 assert_eq!(value_data, values.to_data());
849 assert_eq!(DataType::Int32, list_array.value_type());
850 assert_eq!(9, list_array.len());
851 assert_eq!(4, list_array.null_count());
852 assert_eq!(2, list_array.value_offsets()[3]);
853 assert_eq!(2, list_array.value_length(3));
854
855 let sliced_array = list_array.slice(1, 6);
856 assert_eq!(6, sliced_array.len());
857 assert_eq!(3, sliced_array.null_count());
858
859 for i in 0..sliced_array.len() {
860 if bit_util::get_bit(&null_bits, 1 + i) {
861 assert!(sliced_array.is_valid(i));
862 } else {
863 assert!(sliced_array.is_null(i));
864 }
865 }
866
867 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
869 assert_eq!(2, sliced_list_array.value_offsets()[2]);
870 assert_eq!(2, sliced_list_array.value_length(2));
871 assert_eq!(4, sliced_list_array.value_offsets()[3]);
872 assert_eq!(2, sliced_list_array.value_length(3));
873 assert_eq!(6, sliced_list_array.value_offsets()[5]);
874 assert_eq!(3, sliced_list_array.value_length(5));
875 }
876
877 #[test]
878 fn test_large_list_array_slice() {
879 let value_data = ArrayData::builder(DataType::Int32)
881 .len(10)
882 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
883 .build()
884 .unwrap();
885
886 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
889 let mut null_bits: [u8; 2] = [0; 2];
891 bit_util::set_bit(&mut null_bits, 0);
892 bit_util::set_bit(&mut null_bits, 3);
893 bit_util::set_bit(&mut null_bits, 4);
894 bit_util::set_bit(&mut null_bits, 6);
895 bit_util::set_bit(&mut null_bits, 8);
896
897 let list_data_type = DataType::new_large_list(DataType::Int32, false);
899 let list_data = ArrayData::builder(list_data_type)
900 .len(9)
901 .add_buffer(value_offsets)
902 .add_child_data(value_data.clone())
903 .null_bit_buffer(Some(Buffer::from(null_bits)))
904 .build()
905 .unwrap();
906 let list_array = LargeListArray::from(list_data);
907
908 let values = list_array.values();
909 assert_eq!(value_data, values.to_data());
910 assert_eq!(DataType::Int32, list_array.value_type());
911 assert_eq!(9, list_array.len());
912 assert_eq!(4, list_array.null_count());
913 assert_eq!(2, list_array.value_offsets()[3]);
914 assert_eq!(2, list_array.value_length(3));
915
916 let sliced_array = list_array.slice(1, 6);
917 assert_eq!(6, sliced_array.len());
918 assert_eq!(3, sliced_array.null_count());
919
920 for i in 0..sliced_array.len() {
921 if bit_util::get_bit(&null_bits, 1 + i) {
922 assert!(sliced_array.is_valid(i));
923 } else {
924 assert!(sliced_array.is_null(i));
925 }
926 }
927
928 let sliced_list_array = sliced_array
930 .as_any()
931 .downcast_ref::<LargeListArray>()
932 .unwrap();
933 assert_eq!(2, sliced_list_array.value_offsets()[2]);
934 assert_eq!(2, sliced_list_array.value_length(2));
935 assert_eq!(4, sliced_list_array.value_offsets()[3]);
936 assert_eq!(2, sliced_list_array.value_length(3));
937 assert_eq!(6, sliced_list_array.value_offsets()[5]);
938 assert_eq!(3, sliced_list_array.value_length(5));
939 }
940
941 #[test]
942 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
943 fn test_list_array_index_out_of_bound() {
944 let value_data = ArrayData::builder(DataType::Int32)
946 .len(10)
947 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
948 .build()
949 .unwrap();
950
951 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
954 let mut null_bits: [u8; 2] = [0; 2];
956 bit_util::set_bit(&mut null_bits, 0);
957 bit_util::set_bit(&mut null_bits, 3);
958 bit_util::set_bit(&mut null_bits, 4);
959 bit_util::set_bit(&mut null_bits, 6);
960 bit_util::set_bit(&mut null_bits, 8);
961
962 let list_data_type = DataType::new_large_list(DataType::Int32, false);
964 let list_data = ArrayData::builder(list_data_type)
965 .len(9)
966 .add_buffer(value_offsets)
967 .add_child_data(value_data)
968 .null_bit_buffer(Some(Buffer::from(null_bits)))
969 .build()
970 .unwrap();
971 let list_array = LargeListArray::from(list_data);
972 assert_eq!(9, list_array.len());
973
974 list_array.value(10);
975 }
976 #[test]
977 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
978 #[cfg(not(feature = "force_validate"))]
981 fn test_list_array_invalid_buffer_len() {
982 let value_data = unsafe {
983 ArrayData::builder(DataType::Int32)
984 .len(8)
985 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
986 .build_unchecked()
987 };
988 let list_data_type =
989 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
990 let list_data = unsafe {
991 ArrayData::builder(list_data_type)
992 .len(3)
993 .add_child_data(value_data)
994 .build_unchecked()
995 };
996 drop(ListArray::from(list_data));
997 }
998
999 #[test]
1000 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1001 #[cfg(not(feature = "force_validate"))]
1004 fn test_list_array_invalid_child_array_len() {
1005 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1006 let list_data_type =
1007 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1008 let list_data = unsafe {
1009 ArrayData::builder(list_data_type)
1010 .len(3)
1011 .add_buffer(value_offsets)
1012 .build_unchecked()
1013 };
1014 drop(ListArray::from(list_data));
1015 }
1016
1017 #[test]
1018 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1019 fn test_from_array_data_validation() {
1020 let mut builder = ListBuilder::new(Int32Builder::new());
1021 builder.values().append_value(1);
1022 builder.append(true);
1023 let array = builder.finish();
1024 let _ = LargeListArray::from(array.into_data());
1025 }
1026
1027 #[test]
1028 fn test_list_array_offsets_need_not_start_at_zero() {
1029 let value_data = ArrayData::builder(DataType::Int32)
1030 .len(8)
1031 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1032 .build()
1033 .unwrap();
1034
1035 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1036
1037 let list_data_type =
1038 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1039 let list_data = ArrayData::builder(list_data_type)
1040 .len(3)
1041 .add_buffer(value_offsets)
1042 .add_child_data(value_data)
1043 .build()
1044 .unwrap();
1045
1046 let list_array = ListArray::from(list_data);
1047 assert_eq!(list_array.value_length(0), 0);
1048 assert_eq!(list_array.value_length(1), 3);
1049 assert_eq!(list_array.value_length(2), 2);
1050 }
1051
1052 #[test]
1053 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1054 #[cfg(not(feature = "force_validate"))]
1057 fn test_primitive_array_alignment() {
1058 let buf = Buffer::from_slice_ref([0_u64]);
1059 let buf2 = buf.slice(1);
1060 let array_data = unsafe {
1061 ArrayData::builder(DataType::Int32)
1062 .add_buffer(buf2)
1063 .build_unchecked()
1064 };
1065 drop(Int32Array::from(array_data));
1066 }
1067
1068 #[test]
1069 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1070 #[cfg(not(feature = "force_validate"))]
1073 fn test_list_array_alignment() {
1074 let buf = Buffer::from_slice_ref([0_u64]);
1075 let buf2 = buf.slice(1);
1076
1077 let values: [i32; 8] = [0; 8];
1078 let value_data = unsafe {
1079 ArrayData::builder(DataType::Int32)
1080 .add_buffer(Buffer::from_slice_ref(values))
1081 .build_unchecked()
1082 };
1083
1084 let list_data_type =
1085 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1086 let list_data = unsafe {
1087 ArrayData::builder(list_data_type)
1088 .add_buffer(buf2)
1089 .add_child_data(value_data)
1090 .build_unchecked()
1091 };
1092 drop(ListArray::from(list_data));
1093 }
1094
1095 #[test]
1096 fn list_array_equality() {
1097 fn do_comparison(
1099 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1100 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1101 should_equal: bool,
1102 ) {
1103 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1104 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1105 assert_eq!(lhs == rhs, should_equal);
1106
1107 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1108 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1109 assert_eq!(lhs == rhs, should_equal);
1110 }
1111
1112 do_comparison(
1113 vec![
1114 Some(vec![Some(0), Some(1), Some(2)]),
1115 None,
1116 Some(vec![Some(3), None, Some(5)]),
1117 Some(vec![Some(6), Some(7)]),
1118 ],
1119 vec![
1120 Some(vec![Some(0), Some(1), Some(2)]),
1121 None,
1122 Some(vec![Some(3), None, Some(5)]),
1123 Some(vec![Some(6), Some(7)]),
1124 ],
1125 true,
1126 );
1127
1128 do_comparison(
1129 vec![
1130 None,
1131 None,
1132 Some(vec![Some(3), None, Some(5)]),
1133 Some(vec![Some(6), Some(7)]),
1134 ],
1135 vec![
1136 Some(vec![Some(0), Some(1), Some(2)]),
1137 None,
1138 Some(vec![Some(3), None, Some(5)]),
1139 Some(vec![Some(6), Some(7)]),
1140 ],
1141 false,
1142 );
1143
1144 do_comparison(
1145 vec![
1146 None,
1147 None,
1148 Some(vec![Some(3), None, Some(5)]),
1149 Some(vec![Some(6), Some(7)]),
1150 ],
1151 vec![
1152 None,
1153 None,
1154 Some(vec![Some(3), None, Some(5)]),
1155 Some(vec![Some(0), Some(0)]),
1156 ],
1157 false,
1158 );
1159
1160 do_comparison(
1161 vec![None, None, Some(vec![Some(1)])],
1162 vec![None, None, Some(vec![Some(2)])],
1163 false,
1164 );
1165 }
1166
1167 #[test]
1168 fn test_empty_offsets() {
1169 let f = Arc::new(Field::new("element", DataType::Int32, true));
1170 let string = ListArray::from(
1171 ArrayData::builder(DataType::List(f.clone()))
1172 .buffers(vec![Buffer::from(&[])])
1173 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1174 .build()
1175 .unwrap(),
1176 );
1177 assert_eq!(string.value_offsets(), &[0]);
1178 let string = LargeListArray::from(
1179 ArrayData::builder(DataType::LargeList(f))
1180 .buffers(vec![Buffer::from(&[])])
1181 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1182 .build()
1183 .unwrap(),
1184 );
1185 assert_eq!(string.len(), 0);
1186 assert_eq!(string.value_offsets(), &[0]);
1187 }
1188
1189 #[test]
1190 fn test_try_new() {
1191 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1192 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1193 let values = Arc::new(values) as ArrayRef;
1194
1195 let field = Arc::new(Field::new("element", DataType::Int32, false));
1196 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1197
1198 let nulls = NullBuffer::new_null(3);
1199 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1200
1201 let nulls = NullBuffer::new_null(3);
1202 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1203 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1204 .unwrap_err();
1205
1206 assert_eq!(
1207 err.to_string(),
1208 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1209 );
1210
1211 let field = Arc::new(Field::new("element", DataType::Int64, false));
1212 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1213 .unwrap_err();
1214
1215 assert_eq!(
1216 err.to_string(),
1217 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1218 );
1219
1220 let nulls = NullBuffer::new_null(7);
1221 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1222 let values = Arc::new(values);
1223
1224 let err =
1225 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1226
1227 assert_eq!(
1228 err.to_string(),
1229 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1230 );
1231
1232 let field = Arc::new(Field::new("element", DataType::Int64, true));
1233 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1234
1235 let values = Int64Array::new(vec![0; 2].into(), None);
1236 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1237
1238 assert_eq!(
1239 err.to_string(),
1240 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1241 );
1242 }
1243
1244 #[test]
1245 fn test_from_fixed_size_list() {
1246 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1247 builder.values().append_slice(&[1, 2, 3]);
1248 builder.append(true);
1249 builder.values().append_slice(&[0, 0, 0]);
1250 builder.append(false);
1251 builder.values().append_slice(&[4, 5, 6]);
1252 builder.append(true);
1253 let list: ListArray = builder.finish().into();
1254
1255 let values: Vec<_> = list
1256 .iter()
1257 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1258 .collect();
1259 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1260 }
1261
1262 #[test]
1263 fn test_nullable_union() {
1264 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1265 let mut builder = UnionBuilder::new_dense();
1266 builder.append::<Int32Type>("a", 1).unwrap();
1267 builder.append::<Int32Type>("b", 2).unwrap();
1268 builder.append::<Int32Type>("b", 3).unwrap();
1269 builder.append::<Int32Type>("a", 4).unwrap();
1270 builder.append::<Int32Type>("a", 5).unwrap();
1271 let values = builder.build().unwrap();
1272 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1273 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1274 }
1275}