1mod binary_array;
21
22use crate::types::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer};
24use arrow_data::ArrayData;
25use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26use std::any::Any;
27use std::sync::Arc;
28
29pub use binary_array::*;
30
31mod boolean_array;
32pub use boolean_array::*;
33
34mod byte_array;
35pub use byte_array::*;
36
37mod dictionary_array;
38pub use dictionary_array::*;
39
40mod fixed_size_binary_array;
41pub use fixed_size_binary_array::*;
42
43mod fixed_size_list_array;
44pub use fixed_size_list_array::*;
45
46mod list_array;
47pub use list_array::*;
48
49mod map_array;
50pub use map_array::*;
51
52mod null_array;
53pub use null_array::*;
54
55mod primitive_array;
56pub use primitive_array::*;
57
58mod string_array;
59pub use string_array::*;
60
61mod struct_array;
62pub use struct_array::*;
63
64mod union_array;
65pub use union_array::*;
66
67mod run_array;
68
69pub use run_array::*;
70
71mod byte_view_array;
72
73pub use byte_view_array::*;
74
75mod list_view_array;
76
77pub use list_view_array::*;
78
79use crate::iterator::ArrayIter;
80
81pub trait Array: std::fmt::Debug + Send + Sync {
83 fn as_any(&self) -> &dyn Any;
106
107 fn to_data(&self) -> ArrayData;
109
110 fn into_data(self) -> ArrayData;
114
115 fn data_type(&self) -> &DataType;
128
129 fn slice(&self, offset: usize, length: usize) -> ArrayRef;
143
144 fn len(&self) -> usize;
156
157 fn is_empty(&self) -> bool;
169
170 fn shrink_to_fit(&mut self) {}
175
176 fn offset(&self) -> usize;
192
193 fn nulls(&self) -> Option<&NullBuffer>;
206
207 fn logical_nulls(&self) -> Option<NullBuffer> {
226 self.nulls().cloned()
227 }
228
229 fn is_null(&self, index: usize) -> bool {
251 self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
252 }
253
254 fn is_valid(&self, index: usize) -> bool {
268 !self.is_null(index)
269 }
270
271 fn null_count(&self) -> usize {
287 self.nulls().map(|n| n.null_count()).unwrap_or_default()
288 }
289
290 fn logical_null_count(&self) -> usize {
307 self.logical_nulls()
308 .map(|n| n.null_count())
309 .unwrap_or_default()
310 }
311
312 fn is_nullable(&self) -> bool {
326 self.logical_null_count() != 0
327 }
328
329 fn get_buffer_memory_size(&self) -> usize;
334
335 fn get_array_memory_size(&self) -> usize;
339}
340
341pub type ArrayRef = Arc<dyn Array>;
343
344impl Array for ArrayRef {
346 fn as_any(&self) -> &dyn Any {
347 self.as_ref().as_any()
348 }
349
350 fn to_data(&self) -> ArrayData {
351 self.as_ref().to_data()
352 }
353
354 fn into_data(self) -> ArrayData {
355 self.to_data()
356 }
357
358 fn data_type(&self) -> &DataType {
359 self.as_ref().data_type()
360 }
361
362 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
363 self.as_ref().slice(offset, length)
364 }
365
366 fn len(&self) -> usize {
367 self.as_ref().len()
368 }
369
370 fn is_empty(&self) -> bool {
371 self.as_ref().is_empty()
372 }
373
374 fn shrink_to_fit(&mut self) {
376 if let Some(slf) = Arc::get_mut(self) {
377 slf.shrink_to_fit();
378 } else {
379 }
381 }
382
383 fn offset(&self) -> usize {
384 self.as_ref().offset()
385 }
386
387 fn nulls(&self) -> Option<&NullBuffer> {
388 self.as_ref().nulls()
389 }
390
391 fn logical_nulls(&self) -> Option<NullBuffer> {
392 self.as_ref().logical_nulls()
393 }
394
395 fn is_null(&self, index: usize) -> bool {
396 self.as_ref().is_null(index)
397 }
398
399 fn is_valid(&self, index: usize) -> bool {
400 self.as_ref().is_valid(index)
401 }
402
403 fn null_count(&self) -> usize {
404 self.as_ref().null_count()
405 }
406
407 fn logical_null_count(&self) -> usize {
408 self.as_ref().logical_null_count()
409 }
410
411 fn is_nullable(&self) -> bool {
412 self.as_ref().is_nullable()
413 }
414
415 fn get_buffer_memory_size(&self) -> usize {
416 self.as_ref().get_buffer_memory_size()
417 }
418
419 fn get_array_memory_size(&self) -> usize {
420 self.as_ref().get_array_memory_size()
421 }
422}
423
424impl<T: Array> Array for &T {
425 fn as_any(&self) -> &dyn Any {
426 T::as_any(self)
427 }
428
429 fn to_data(&self) -> ArrayData {
430 T::to_data(self)
431 }
432
433 fn into_data(self) -> ArrayData {
434 self.to_data()
435 }
436
437 fn data_type(&self) -> &DataType {
438 T::data_type(self)
439 }
440
441 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
442 T::slice(self, offset, length)
443 }
444
445 fn len(&self) -> usize {
446 T::len(self)
447 }
448
449 fn is_empty(&self) -> bool {
450 T::is_empty(self)
451 }
452
453 fn offset(&self) -> usize {
454 T::offset(self)
455 }
456
457 fn nulls(&self) -> Option<&NullBuffer> {
458 T::nulls(self)
459 }
460
461 fn logical_nulls(&self) -> Option<NullBuffer> {
462 T::logical_nulls(self)
463 }
464
465 fn is_null(&self, index: usize) -> bool {
466 T::is_null(self, index)
467 }
468
469 fn is_valid(&self, index: usize) -> bool {
470 T::is_valid(self, index)
471 }
472
473 fn null_count(&self) -> usize {
474 T::null_count(self)
475 }
476
477 fn logical_null_count(&self) -> usize {
478 T::logical_null_count(self)
479 }
480
481 fn is_nullable(&self) -> bool {
482 T::is_nullable(self)
483 }
484
485 fn get_buffer_memory_size(&self) -> usize {
486 T::get_buffer_memory_size(self)
487 }
488
489 fn get_array_memory_size(&self) -> usize {
490 T::get_array_memory_size(self)
491 }
492}
493
494pub trait ArrayAccessor: Array {
575 type Item: Send + Sync;
577
578 fn value(&self, index: usize) -> Self::Item;
582
583 unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
587}
588
589pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
597 fn is_ascii(&self) -> bool;
599
600 fn iter(&self) -> ArrayIter<Self>;
602}
603
604impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
605 fn is_ascii(&self) -> bool {
606 GenericStringArray::<O>::is_ascii(self)
607 }
608
609 fn iter(&self) -> ArrayIter<Self> {
610 GenericStringArray::<O>::iter(self)
611 }
612}
613impl<'a> StringArrayType<'a> for &'a StringViewArray {
614 fn is_ascii(&self) -> bool {
615 StringViewArray::is_ascii(self)
616 }
617
618 fn iter(&self) -> ArrayIter<Self> {
619 StringViewArray::iter(self)
620 }
621}
622
623pub trait BinaryArrayType<'a>: ArrayAccessor<Item = &'a [u8]> + Sized {
631 fn iter(&self) -> ArrayIter<Self>;
633}
634
635impl<'a, O: OffsetSizeTrait> BinaryArrayType<'a> for &'a GenericBinaryArray<O> {
636 fn iter(&self) -> ArrayIter<Self> {
637 GenericBinaryArray::<O>::iter(self)
638 }
639}
640impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
641 fn iter(&self) -> ArrayIter<Self> {
642 BinaryViewArray::iter(self)
643 }
644}
645
646impl PartialEq for dyn Array + '_ {
647 fn eq(&self, other: &Self) -> bool {
648 self.to_data().eq(&other.to_data())
649 }
650}
651
652impl<T: Array> PartialEq<T> for dyn Array + '_ {
653 fn eq(&self, other: &T) -> bool {
654 self.to_data().eq(&other.to_data())
655 }
656}
657
658impl PartialEq for NullArray {
659 fn eq(&self, other: &NullArray) -> bool {
660 self.to_data().eq(&other.to_data())
661 }
662}
663
664impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
665 fn eq(&self, other: &PrimitiveArray<T>) -> bool {
666 self.to_data().eq(&other.to_data())
667 }
668}
669
670impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
671 fn eq(&self, other: &Self) -> bool {
672 self.to_data().eq(&other.to_data())
673 }
674}
675
676impl PartialEq for BooleanArray {
677 fn eq(&self, other: &BooleanArray) -> bool {
678 self.to_data().eq(&other.to_data())
679 }
680}
681
682impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
683 fn eq(&self, other: &Self) -> bool {
684 self.to_data().eq(&other.to_data())
685 }
686}
687
688impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
689 fn eq(&self, other: &Self) -> bool {
690 self.to_data().eq(&other.to_data())
691 }
692}
693
694impl PartialEq for FixedSizeBinaryArray {
695 fn eq(&self, other: &Self) -> bool {
696 self.to_data().eq(&other.to_data())
697 }
698}
699
700impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
701 fn eq(&self, other: &Self) -> bool {
702 self.to_data().eq(&other.to_data())
703 }
704}
705
706impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
707 fn eq(&self, other: &Self) -> bool {
708 self.to_data().eq(&other.to_data())
709 }
710}
711
712impl PartialEq for MapArray {
713 fn eq(&self, other: &Self) -> bool {
714 self.to_data().eq(&other.to_data())
715 }
716}
717
718impl PartialEq for FixedSizeListArray {
719 fn eq(&self, other: &Self) -> bool {
720 self.to_data().eq(&other.to_data())
721 }
722}
723
724impl PartialEq for StructArray {
725 fn eq(&self, other: &Self) -> bool {
726 self.to_data().eq(&other.to_data())
727 }
728}
729
730impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
731 fn eq(&self, other: &Self) -> bool {
732 self.to_data().eq(&other.to_data())
733 }
734}
735
736impl<R: RunEndIndexType> PartialEq for RunArray<R> {
737 fn eq(&self, other: &Self) -> bool {
738 self.to_data().eq(&other.to_data())
739 }
740}
741
742pub fn make_array(data: ArrayData) -> ArrayRef {
745 match data.data_type() {
746 DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
747 DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
748 DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
749 DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
750 DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
751 DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
752 DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
753 DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
754 DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
755 DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
756 DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
757 DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
758 DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
759 DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
760 DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
761 DataType::Time32(TimeUnit::Millisecond) => {
762 Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
763 }
764 DataType::Time64(TimeUnit::Microsecond) => {
765 Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
766 }
767 DataType::Time64(TimeUnit::Nanosecond) => {
768 Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
769 }
770 DataType::Timestamp(TimeUnit::Second, _) => {
771 Arc::new(TimestampSecondArray::from(data)) as ArrayRef
772 }
773 DataType::Timestamp(TimeUnit::Millisecond, _) => {
774 Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
775 }
776 DataType::Timestamp(TimeUnit::Microsecond, _) => {
777 Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
778 }
779 DataType::Timestamp(TimeUnit::Nanosecond, _) => {
780 Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
781 }
782 DataType::Interval(IntervalUnit::YearMonth) => {
783 Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
784 }
785 DataType::Interval(IntervalUnit::DayTime) => {
786 Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
787 }
788 DataType::Interval(IntervalUnit::MonthDayNano) => {
789 Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
790 }
791 DataType::Duration(TimeUnit::Second) => {
792 Arc::new(DurationSecondArray::from(data)) as ArrayRef
793 }
794 DataType::Duration(TimeUnit::Millisecond) => {
795 Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
796 }
797 DataType::Duration(TimeUnit::Microsecond) => {
798 Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
799 }
800 DataType::Duration(TimeUnit::Nanosecond) => {
801 Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
802 }
803 DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
804 DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
805 DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
806 DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
807 DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
808 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
809 DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
810 DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
811 DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
812 DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
813 DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
814 DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
815 DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
816 DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
817 DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
818 DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
819 DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
820 DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
821 DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
822 DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
823 DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
824 DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
825 DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
826 DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
827 dt => panic!("Unexpected dictionary key type {dt:?}"),
828 },
829 DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
830 DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
831 DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
832 DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
833 dt => panic!("Unexpected data type for run_ends array {dt:?}"),
834 },
835 DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
836 DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
837 DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
838 dt => panic!("Unexpected data type {dt:?}"),
839 }
840}
841
842pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
855 let data = ArrayData::new_empty(data_type);
856 make_array(data)
857}
858
859pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
873 make_array(ArrayData::new_null(data_type, length))
874}
875
876unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
882 match data.is_empty() && data.buffers()[0].is_empty() {
883 true => OffsetBuffer::new_empty(),
884 false => {
885 let buffer =
886 ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len() + 1);
887 unsafe { OffsetBuffer::new_unchecked(buffer) }
890 }
891 }
892}
893
894fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
896where
897 A: Array,
898 F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
899{
900 let head = std::cmp::min(10, array.len());
901
902 for i in 0..head {
903 if array.is_null(i) {
904 writeln!(f, " null,")?;
905 } else {
906 write!(f, " ")?;
907 print_item(array, i, f)?;
908 writeln!(f, ",")?;
909 }
910 }
911 if array.len() > 10 {
912 if array.len() > 20 {
913 writeln!(f, " ...{} elements...,", array.len() - 20)?;
914 }
915
916 let tail = std::cmp::max(head, array.len() - 10);
917
918 for i in tail..array.len() {
919 if array.is_null(i) {
920 writeln!(f, " null,")?;
921 } else {
922 write!(f, " ")?;
923 print_item(array, i, f)?;
924 writeln!(f, ",")?;
925 }
926 }
927 }
928 Ok(())
929}
930
931#[cfg(test)]
932mod tests {
933 use super::*;
934 use crate::cast::{as_union_array, downcast_array};
935 use crate::downcast_run_array;
936 use arrow_buffer::MutableBuffer;
937 use arrow_schema::{Field, Fields, UnionFields, UnionMode};
938
939 #[test]
940 fn test_empty_primitive() {
941 let array = new_empty_array(&DataType::Int32);
942 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
943 assert_eq!(a.len(), 0);
944 let expected: &[i32] = &[];
945 assert_eq!(a.values(), expected);
946 }
947
948 #[test]
949 fn test_empty_variable_sized() {
950 let array = new_empty_array(&DataType::Utf8);
951 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
952 assert_eq!(a.len(), 0);
953 assert_eq!(a.value_offsets()[0], 0i32);
954 }
955
956 #[test]
957 fn test_empty_list_primitive() {
958 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
959 let array = new_empty_array(&data_type);
960 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
961 assert_eq!(a.len(), 0);
962 assert_eq!(a.value_offsets()[0], 0i32);
963 }
964
965 #[test]
966 fn test_null_boolean() {
967 let array = new_null_array(&DataType::Boolean, 9);
968 let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
969 assert_eq!(a.len(), 9);
970 for i in 0..9 {
971 assert!(a.is_null(i));
972 }
973 }
974
975 #[test]
976 fn test_null_primitive() {
977 let array = new_null_array(&DataType::Int32, 9);
978 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
979 assert_eq!(a.len(), 9);
980 for i in 0..9 {
981 assert!(a.is_null(i));
982 }
983 }
984
985 #[test]
986 fn test_null_struct() {
987 let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
990 let array = new_null_array(&struct_type, 9);
991
992 let a = array.as_any().downcast_ref::<StructArray>().unwrap();
993 assert_eq!(a.len(), 9);
994 assert_eq!(a.column(0).len(), 9);
995 for i in 0..9 {
996 assert!(a.is_null(i));
997 }
998
999 a.slice(0, 5);
1001 }
1002
1003 #[test]
1004 fn test_null_variable_sized() {
1005 let array = new_null_array(&DataType::Utf8, 9);
1006 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1007 assert_eq!(a.len(), 9);
1008 assert_eq!(a.value_offsets()[9], 0i32);
1009 for i in 0..9 {
1010 assert!(a.is_null(i));
1011 }
1012 }
1013
1014 #[test]
1015 fn test_null_list_primitive() {
1016 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
1017 let array = new_null_array(&data_type, 9);
1018 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1019 assert_eq!(a.len(), 9);
1020 assert_eq!(a.value_offsets()[9], 0i32);
1021 for i in 0..9 {
1022 assert!(a.is_null(i));
1023 }
1024 }
1025
1026 #[test]
1027 fn test_null_map() {
1028 let data_type = DataType::Map(
1029 Arc::new(Field::new(
1030 "entry",
1031 DataType::Struct(Fields::from(vec![
1032 Field::new("key", DataType::Utf8, false),
1033 Field::new("value", DataType::Int32, true),
1034 ])),
1035 false,
1036 )),
1037 false,
1038 );
1039 let array = new_null_array(&data_type, 9);
1040 let a = array.as_any().downcast_ref::<MapArray>().unwrap();
1041 assert_eq!(a.len(), 9);
1042 assert_eq!(a.value_offsets()[9], 0i32);
1043 for i in 0..9 {
1044 assert!(a.is_null(i));
1045 }
1046 }
1047
1048 #[test]
1049 fn test_null_dictionary() {
1050 let values =
1051 vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1052
1053 let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1054 let array = Arc::new(array) as ArrayRef;
1055
1056 let null_array = new_null_array(array.data_type(), 9);
1057 assert_eq!(&array, &null_array);
1058 assert_eq!(
1059 array.to_data().buffers()[0].len(),
1060 null_array.to_data().buffers()[0].len()
1061 );
1062 }
1063
1064 #[test]
1065 fn test_null_union() {
1066 for mode in [UnionMode::Sparse, UnionMode::Dense] {
1067 let data_type = DataType::Union(
1068 UnionFields::new(
1069 vec![2, 1],
1070 vec![
1071 Field::new("foo", DataType::Int32, true),
1072 Field::new("bar", DataType::Int64, true),
1073 ],
1074 ),
1075 mode,
1076 );
1077 let array = new_null_array(&data_type, 4);
1078
1079 let array = as_union_array(array.as_ref());
1080 assert_eq!(array.len(), 4);
1081 assert_eq!(array.null_count(), 0);
1082 assert_eq!(array.logical_null_count(), 4);
1083
1084 for i in 0..4 {
1085 let a = array.value(i);
1086 assert_eq!(a.len(), 1);
1087 assert_eq!(a.null_count(), 1);
1088 assert_eq!(a.logical_null_count(), 1);
1089 assert!(a.is_null(0))
1090 }
1091
1092 array.to_data().validate_full().unwrap();
1093 }
1094 }
1095
1096 #[test]
1097 #[allow(unused_parens)]
1098 fn test_null_runs() {
1099 for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1100 let data_type = DataType::RunEndEncoded(
1101 Arc::new(Field::new("run_ends", r, false)),
1102 Arc::new(Field::new("values", DataType::Utf8, true)),
1103 );
1104
1105 let array = new_null_array(&data_type, 4);
1106 let array = array.as_ref();
1107
1108 downcast_run_array! {
1109 array => {
1110 assert_eq!(array.len(), 4);
1111 assert_eq!(array.null_count(), 0);
1112 assert_eq!(array.logical_null_count(), 4);
1113 assert_eq!(array.values().len(), 1);
1114 assert_eq!(array.values().null_count(), 1);
1115 assert_eq!(array.run_ends().len(), 4);
1116 assert_eq!(array.run_ends().values(), &[4]);
1117
1118 let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1119 assert_eq!(idx, &[0,0,0,0]);
1120 }
1121 d => unreachable!("{d}")
1122 }
1123 }
1124 }
1125
1126 #[test]
1127 fn test_null_fixed_size_binary() {
1128 for size in [1, 2, 7] {
1129 let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1130 let array = array
1131 .as_ref()
1132 .as_any()
1133 .downcast_ref::<FixedSizeBinaryArray>()
1134 .unwrap();
1135
1136 assert_eq!(array.len(), 6);
1137 assert_eq!(array.null_count(), 6);
1138 assert_eq!(array.logical_null_count(), 6);
1139 array.iter().for_each(|x| assert!(x.is_none()));
1140 }
1141 }
1142
1143 #[test]
1144 fn test_memory_size_null() {
1145 let null_arr = NullArray::new(32);
1146
1147 assert_eq!(0, null_arr.get_buffer_memory_size());
1148 assert_eq!(
1149 std::mem::size_of::<usize>(),
1150 null_arr.get_array_memory_size()
1151 );
1152 }
1153
1154 #[test]
1155 fn test_memory_size_primitive() {
1156 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1157 let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1158
1159 assert_eq!(
1161 arr.get_array_memory_size() - empty.get_array_memory_size(),
1162 128 * std::mem::size_of::<i64>()
1163 );
1164 }
1165
1166 #[test]
1167 fn test_memory_size_primitive_sliced() {
1168 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1169 let slice1 = arr.slice(0, 64);
1170 let slice2 = arr.slice(64, 64);
1171
1172 assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1174 assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1175 }
1176
1177 #[test]
1178 fn test_memory_size_primitive_nullable() {
1179 let arr: PrimitiveArray<Int64Type> = (0..128)
1180 .map(|i| if i % 20 == 0 { Some(i) } else { None })
1181 .collect();
1182 let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1183 ArrayData::builder(arr.data_type().clone())
1184 .add_buffer(MutableBuffer::new(0).into())
1185 .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1186 .build()
1187 .unwrap(),
1188 );
1189
1190 assert_eq!(
1194 std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1195 empty_with_bitmap.get_array_memory_size()
1196 );
1197
1198 assert_eq!(
1201 arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1202 128 * std::mem::size_of::<i64>() + 64
1203 );
1204 }
1205
1206 #[test]
1207 fn test_memory_size_dictionary() {
1208 let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1209 let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1210 (0..256).map(|i| (i % values.len()) as i16),
1211 );
1212
1213 let dict_data_type = DataType::Dictionary(
1214 Box::new(keys.data_type().clone()),
1215 Box::new(values.data_type().clone()),
1216 );
1217 let dict_data = keys
1218 .into_data()
1219 .into_builder()
1220 .data_type(dict_data_type)
1221 .child_data(vec![values.into_data()])
1222 .build()
1223 .unwrap();
1224
1225 let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1226 Box::new(DataType::Int16),
1227 Box::new(DataType::Int64),
1228 ));
1229
1230 let arr = DictionaryArray::<Int16Type>::from(dict_data);
1231 let empty = DictionaryArray::<Int16Type>::from(empty_data);
1232
1233 let expected_keys_size = 256 * std::mem::size_of::<i16>();
1234 assert_eq!(
1235 arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1236 expected_keys_size
1237 );
1238
1239 let expected_values_size = 16 * std::mem::size_of::<i64>();
1240 assert_eq!(
1241 arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1242 expected_values_size
1243 );
1244
1245 let expected_size = expected_keys_size + expected_values_size;
1246 assert_eq!(
1247 arr.get_array_memory_size() - empty.get_array_memory_size(),
1248 expected_size
1249 );
1250 }
1251
1252 fn compute_my_thing(arr: &dyn Array) -> bool {
1254 !arr.is_empty()
1255 }
1256
1257 #[test]
1258 fn test_array_ref_as_array() {
1259 let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1260
1261 assert!(compute_my_thing(&arr));
1263
1264 let arr: ArrayRef = Arc::new(arr);
1266 assert!(compute_my_thing(&arr));
1267 assert!(compute_my_thing(arr.as_ref()));
1268 }
1269
1270 #[test]
1271 fn test_downcast_array() {
1272 let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1273
1274 let boxed: ArrayRef = Arc::new(array);
1275 let array: Int32Array = downcast_array(&boxed);
1276
1277 let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1278 assert_eq!(array, expected);
1279 }
1280}