1mod binary_array;
21
22use crate::types::*;
23use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ScalarBuffer};
24use arrow_data::ArrayData;
25use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26use std::any::Any;
27use std::sync::Arc;
28
29pub use binary_array::*;
30
31mod boolean_array;
32pub use boolean_array::*;
33
34mod byte_array;
35pub use byte_array::*;
36
37mod dictionary_array;
38pub use dictionary_array::*;
39
40mod fixed_size_binary_array;
41pub use fixed_size_binary_array::*;
42
43mod fixed_size_list_array;
44pub use fixed_size_list_array::*;
45
46mod list_array;
47pub use list_array::*;
48
49mod map_array;
50pub use map_array::*;
51
52mod null_array;
53pub use null_array::*;
54
55mod primitive_array;
56pub use primitive_array::*;
57
58mod string_array;
59pub use string_array::*;
60
61mod struct_array;
62pub use struct_array::*;
63
64mod union_array;
65pub use union_array::*;
66
67mod run_array;
68
69pub use run_array::*;
70
71mod byte_view_array;
72
73pub use byte_view_array::*;
74
75mod list_view_array;
76
77pub use list_view_array::*;
78
79use crate::iterator::ArrayIter;
80
81pub unsafe trait Array: std::fmt::Debug + Send + Sync {
92 fn as_any(&self) -> &dyn Any;
115
116 fn to_data(&self) -> ArrayData;
118
119 fn into_data(self) -> ArrayData;
123
124 fn data_type(&self) -> &DataType;
137
138 fn slice(&self, offset: usize, length: usize) -> ArrayRef;
152
153 fn len(&self) -> usize;
165
166 fn is_empty(&self) -> bool;
178
179 fn shrink_to_fit(&mut self) {}
184
185 fn offset(&self) -> usize;
201
202 fn nulls(&self) -> Option<&NullBuffer>;
215
216 fn logical_nulls(&self) -> Option<NullBuffer> {
235 self.nulls().cloned()
236 }
237
238 fn is_null(&self, index: usize) -> bool {
260 self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
261 }
262
263 fn is_valid(&self, index: usize) -> bool {
277 !self.is_null(index)
278 }
279
280 fn null_count(&self) -> usize {
296 self.nulls().map(|n| n.null_count()).unwrap_or_default()
297 }
298
299 fn logical_null_count(&self) -> usize {
316 self.logical_nulls()
317 .map(|n| n.null_count())
318 .unwrap_or_default()
319 }
320
321 fn is_nullable(&self) -> bool {
335 self.logical_null_count() != 0
336 }
337
338 fn get_buffer_memory_size(&self) -> usize;
343
344 fn get_array_memory_size(&self) -> usize;
348}
349
350pub type ArrayRef = Arc<dyn Array>;
352
353unsafe impl Array for ArrayRef {
355 fn as_any(&self) -> &dyn Any {
356 self.as_ref().as_any()
357 }
358
359 fn to_data(&self) -> ArrayData {
360 self.as_ref().to_data()
361 }
362
363 fn into_data(self) -> ArrayData {
364 self.to_data()
365 }
366
367 fn data_type(&self) -> &DataType {
368 self.as_ref().data_type()
369 }
370
371 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
372 self.as_ref().slice(offset, length)
373 }
374
375 fn len(&self) -> usize {
376 self.as_ref().len()
377 }
378
379 fn is_empty(&self) -> bool {
380 self.as_ref().is_empty()
381 }
382
383 fn shrink_to_fit(&mut self) {
385 if let Some(slf) = Arc::get_mut(self) {
386 slf.shrink_to_fit();
387 } else {
388 }
390 }
391
392 fn offset(&self) -> usize {
393 self.as_ref().offset()
394 }
395
396 fn nulls(&self) -> Option<&NullBuffer> {
397 self.as_ref().nulls()
398 }
399
400 fn logical_nulls(&self) -> Option<NullBuffer> {
401 self.as_ref().logical_nulls()
402 }
403
404 fn is_null(&self, index: usize) -> bool {
405 self.as_ref().is_null(index)
406 }
407
408 fn is_valid(&self, index: usize) -> bool {
409 self.as_ref().is_valid(index)
410 }
411
412 fn null_count(&self) -> usize {
413 self.as_ref().null_count()
414 }
415
416 fn logical_null_count(&self) -> usize {
417 self.as_ref().logical_null_count()
418 }
419
420 fn is_nullable(&self) -> bool {
421 self.as_ref().is_nullable()
422 }
423
424 fn get_buffer_memory_size(&self) -> usize {
425 self.as_ref().get_buffer_memory_size()
426 }
427
428 fn get_array_memory_size(&self) -> usize {
429 self.as_ref().get_array_memory_size()
430 }
431}
432
433unsafe impl<T: Array> Array for &T {
434 fn as_any(&self) -> &dyn Any {
435 T::as_any(self)
436 }
437
438 fn to_data(&self) -> ArrayData {
439 T::to_data(self)
440 }
441
442 fn into_data(self) -> ArrayData {
443 self.to_data()
444 }
445
446 fn data_type(&self) -> &DataType {
447 T::data_type(self)
448 }
449
450 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
451 T::slice(self, offset, length)
452 }
453
454 fn len(&self) -> usize {
455 T::len(self)
456 }
457
458 fn is_empty(&self) -> bool {
459 T::is_empty(self)
460 }
461
462 fn offset(&self) -> usize {
463 T::offset(self)
464 }
465
466 fn nulls(&self) -> Option<&NullBuffer> {
467 T::nulls(self)
468 }
469
470 fn logical_nulls(&self) -> Option<NullBuffer> {
471 T::logical_nulls(self)
472 }
473
474 fn is_null(&self, index: usize) -> bool {
475 T::is_null(self, index)
476 }
477
478 fn is_valid(&self, index: usize) -> bool {
479 T::is_valid(self, index)
480 }
481
482 fn null_count(&self) -> usize {
483 T::null_count(self)
484 }
485
486 fn logical_null_count(&self) -> usize {
487 T::logical_null_count(self)
488 }
489
490 fn is_nullable(&self) -> bool {
491 T::is_nullable(self)
492 }
493
494 fn get_buffer_memory_size(&self) -> usize {
495 T::get_buffer_memory_size(self)
496 }
497
498 fn get_array_memory_size(&self) -> usize {
499 T::get_array_memory_size(self)
500 }
501}
502
503pub trait ArrayAccessor: Array {
584 type Item: Send + Sync;
586
587 fn value(&self, index: usize) -> Self::Item;
591
592 unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
596}
597
598pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
606 fn is_ascii(&self) -> bool;
608
609 fn iter(&self) -> ArrayIter<Self>;
611}
612
613impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
614 fn is_ascii(&self) -> bool {
615 GenericStringArray::<O>::is_ascii(self)
616 }
617
618 fn iter(&self) -> ArrayIter<Self> {
619 GenericStringArray::<O>::iter(self)
620 }
621}
622impl<'a> StringArrayType<'a> for &'a StringViewArray {
623 fn is_ascii(&self) -> bool {
624 StringViewArray::is_ascii(self)
625 }
626
627 fn iter(&self) -> ArrayIter<Self> {
628 StringViewArray::iter(self)
629 }
630}
631
632pub trait BinaryArrayType<'a>: ArrayAccessor<Item = &'a [u8]> + Sized {
641 fn iter(&self) -> ArrayIter<Self>;
643}
644
645impl<'a, O: OffsetSizeTrait> BinaryArrayType<'a> for &'a GenericBinaryArray<O> {
646 fn iter(&self) -> ArrayIter<Self> {
647 GenericBinaryArray::<O>::iter(self)
648 }
649}
650impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
651 fn iter(&self) -> ArrayIter<Self> {
652 BinaryViewArray::iter(self)
653 }
654}
655impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
656 fn iter(&self) -> ArrayIter<Self> {
657 FixedSizeBinaryArray::iter(self)
658 }
659}
660
661impl PartialEq for dyn Array + '_ {
662 fn eq(&self, other: &Self) -> bool {
663 self.to_data().eq(&other.to_data())
664 }
665}
666
667impl<T: Array> PartialEq<T> for dyn Array + '_ {
668 fn eq(&self, other: &T) -> bool {
669 self.to_data().eq(&other.to_data())
670 }
671}
672
673impl PartialEq for NullArray {
674 fn eq(&self, other: &NullArray) -> bool {
675 self.to_data().eq(&other.to_data())
676 }
677}
678
679impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
680 fn eq(&self, other: &PrimitiveArray<T>) -> bool {
681 self.to_data().eq(&other.to_data())
682 }
683}
684
685impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
686 fn eq(&self, other: &Self) -> bool {
687 self.to_data().eq(&other.to_data())
688 }
689}
690
691impl PartialEq for BooleanArray {
692 fn eq(&self, other: &BooleanArray) -> bool {
693 self.to_data().eq(&other.to_data())
694 }
695}
696
697impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
698 fn eq(&self, other: &Self) -> bool {
699 self.to_data().eq(&other.to_data())
700 }
701}
702
703impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
704 fn eq(&self, other: &Self) -> bool {
705 self.to_data().eq(&other.to_data())
706 }
707}
708
709impl PartialEq for FixedSizeBinaryArray {
710 fn eq(&self, other: &Self) -> bool {
711 self.to_data().eq(&other.to_data())
712 }
713}
714
715impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
716 fn eq(&self, other: &Self) -> bool {
717 self.to_data().eq(&other.to_data())
718 }
719}
720
721impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
722 fn eq(&self, other: &Self) -> bool {
723 self.to_data().eq(&other.to_data())
724 }
725}
726
727impl PartialEq for MapArray {
728 fn eq(&self, other: &Self) -> bool {
729 self.to_data().eq(&other.to_data())
730 }
731}
732
733impl PartialEq for FixedSizeListArray {
734 fn eq(&self, other: &Self) -> bool {
735 self.to_data().eq(&other.to_data())
736 }
737}
738
739impl PartialEq for StructArray {
740 fn eq(&self, other: &Self) -> bool {
741 self.to_data().eq(&other.to_data())
742 }
743}
744
745impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
746 fn eq(&self, other: &Self) -> bool {
747 self.to_data().eq(&other.to_data())
748 }
749}
750
751impl<R: RunEndIndexType> PartialEq for RunArray<R> {
752 fn eq(&self, other: &Self) -> bool {
753 self.to_data().eq(&other.to_data())
754 }
755}
756
757pub fn make_array(data: ArrayData) -> ArrayRef {
788 match data.data_type() {
789 DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
790 DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
791 DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
792 DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
793 DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
794 DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
795 DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
796 DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
797 DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
798 DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
799 DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
800 DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
801 DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
802 DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
803 DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
804 DataType::Time32(TimeUnit::Millisecond) => {
805 Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
806 }
807 DataType::Time64(TimeUnit::Microsecond) => {
808 Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
809 }
810 DataType::Time64(TimeUnit::Nanosecond) => {
811 Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
812 }
813 DataType::Timestamp(TimeUnit::Second, _) => {
814 Arc::new(TimestampSecondArray::from(data)) as ArrayRef
815 }
816 DataType::Timestamp(TimeUnit::Millisecond, _) => {
817 Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
818 }
819 DataType::Timestamp(TimeUnit::Microsecond, _) => {
820 Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
821 }
822 DataType::Timestamp(TimeUnit::Nanosecond, _) => {
823 Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
824 }
825 DataType::Interval(IntervalUnit::YearMonth) => {
826 Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
827 }
828 DataType::Interval(IntervalUnit::DayTime) => {
829 Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
830 }
831 DataType::Interval(IntervalUnit::MonthDayNano) => {
832 Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
833 }
834 DataType::Duration(TimeUnit::Second) => {
835 Arc::new(DurationSecondArray::from(data)) as ArrayRef
836 }
837 DataType::Duration(TimeUnit::Millisecond) => {
838 Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
839 }
840 DataType::Duration(TimeUnit::Microsecond) => {
841 Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
842 }
843 DataType::Duration(TimeUnit::Nanosecond) => {
844 Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
845 }
846 DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
847 DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
848 DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
849 DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
850 DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
851 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
852 DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
853 DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
854 DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
855 DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
856 DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
857 DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
858 DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
859 DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
860 DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
861 DataType::Dictionary(key_type, _) => match key_type.as_ref() {
862 DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
863 DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
864 DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
865 DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
866 DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
867 DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
868 DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
869 DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
870 dt => unimplemented!("Unexpected dictionary key type {dt}"),
871 },
872 DataType::RunEndEncoded(run_ends_type, _) => match run_ends_type.data_type() {
873 DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
874 DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
875 DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
876 dt => unimplemented!("Unexpected data type for run_ends array {dt}"),
877 },
878 DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
879 DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
880 DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
881 DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
882 DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
883 dt => unimplemented!("Unexpected data type {dt}"),
884 }
885}
886
887pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
900 let data = ArrayData::new_empty(data_type);
901 make_array(data)
902}
903
904pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
918 make_array(ArrayData::new_null(data_type, length))
919}
920
921unsafe fn get_offsets_from_buffer<O: ArrowNativeType>(
928 buffer: Buffer,
929 offset: usize,
930 len: usize,
931) -> OffsetBuffer<O> {
932 if len == 0 && buffer.is_empty() {
933 return OffsetBuffer::new_empty();
934 }
935
936 let scalar_buffer = ScalarBuffer::new(buffer, offset, len + 1);
937 unsafe { OffsetBuffer::new_unchecked(scalar_buffer) }
940}
941
942fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
944where
945 A: Array,
946 F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
947{
948 let head = std::cmp::min(10, array.len());
949
950 for i in 0..head {
951 if array.is_null(i) {
952 writeln!(f, " null,")?;
953 } else {
954 write!(f, " ")?;
955 print_item(array, i, f)?;
956 writeln!(f, ",")?;
957 }
958 }
959 if array.len() > 10 {
960 if array.len() > 20 {
961 writeln!(f, " ...{} elements...,", array.len() - 20)?;
962 }
963
964 let tail = std::cmp::max(head, array.len() - 10);
965
966 for i in tail..array.len() {
967 if array.is_null(i) {
968 writeln!(f, " null,")?;
969 } else {
970 write!(f, " ")?;
971 print_item(array, i, f)?;
972 writeln!(f, ",")?;
973 }
974 }
975 }
976 Ok(())
977}
978
979#[cfg(test)]
980mod tests {
981 use super::*;
982 use crate::cast::{as_union_array, downcast_array};
983 use crate::downcast_run_array;
984 use arrow_buffer::MutableBuffer;
985 use arrow_schema::{Field, Fields, UnionFields, UnionMode};
986
987 #[test]
988 fn test_empty_primitive() {
989 let array = new_empty_array(&DataType::Int32);
990 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
991 assert_eq!(a.len(), 0);
992 let expected: &[i32] = &[];
993 assert_eq!(a.values(), expected);
994 }
995
996 #[test]
997 fn test_empty_variable_sized() {
998 let array = new_empty_array(&DataType::Utf8);
999 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1000 assert_eq!(a.len(), 0);
1001 assert_eq!(a.value_offsets()[0], 0i32);
1002 }
1003
1004 #[test]
1005 fn test_empty_list_primitive() {
1006 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1007 let array = new_empty_array(&data_type);
1008 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1009 assert_eq!(a.len(), 0);
1010 assert_eq!(a.value_offsets()[0], 0i32);
1011 }
1012
1013 #[test]
1014 fn test_null_boolean() {
1015 let array = new_null_array(&DataType::Boolean, 9);
1016 let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
1017 assert_eq!(a.len(), 9);
1018 for i in 0..9 {
1019 assert!(a.is_null(i));
1020 }
1021 }
1022
1023 #[test]
1024 fn test_null_primitive() {
1025 let array = new_null_array(&DataType::Int32, 9);
1026 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
1027 assert_eq!(a.len(), 9);
1028 for i in 0..9 {
1029 assert!(a.is_null(i));
1030 }
1031 }
1032
1033 #[test]
1034 fn test_null_struct() {
1035 let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
1038 let array = new_null_array(&struct_type, 9);
1039
1040 let a = array.as_any().downcast_ref::<StructArray>().unwrap();
1041 assert_eq!(a.len(), 9);
1042 assert_eq!(a.column(0).len(), 9);
1043 for i in 0..9 {
1044 assert!(a.is_null(i));
1045 }
1046
1047 a.slice(0, 5);
1049 }
1050
1051 #[test]
1052 fn test_null_variable_sized() {
1053 let array = new_null_array(&DataType::Utf8, 9);
1054 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1055 assert_eq!(a.len(), 9);
1056 assert_eq!(a.value_offsets()[9], 0i32);
1057 for i in 0..9 {
1058 assert!(a.is_null(i));
1059 }
1060 }
1061
1062 #[test]
1063 fn test_null_list_primitive() {
1064 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
1065 let array = new_null_array(&data_type, 9);
1066 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1067 assert_eq!(a.len(), 9);
1068 assert_eq!(a.value_offsets()[9], 0i32);
1069 for i in 0..9 {
1070 assert!(a.is_null(i));
1071 }
1072 }
1073
1074 #[test]
1075 fn test_null_map() {
1076 let data_type = DataType::Map(
1077 Arc::new(Field::new(
1078 "entry",
1079 DataType::Struct(Fields::from(vec![
1080 Field::new("key", DataType::Utf8, false),
1081 Field::new("value", DataType::Int32, true),
1082 ])),
1083 false,
1084 )),
1085 false,
1086 );
1087 let array = new_null_array(&data_type, 9);
1088 let a = array.as_any().downcast_ref::<MapArray>().unwrap();
1089 assert_eq!(a.len(), 9);
1090 assert_eq!(a.value_offsets()[9], 0i32);
1091 for i in 0..9 {
1092 assert!(a.is_null(i));
1093 }
1094 }
1095
1096 #[test]
1097 fn test_null_dictionary() {
1098 let values =
1099 vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1100
1101 let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1102 let array = Arc::new(array) as ArrayRef;
1103
1104 let null_array = new_null_array(array.data_type(), 9);
1105 assert_eq!(&array, &null_array);
1106 assert_eq!(
1107 array.to_data().buffers()[0].len(),
1108 null_array.to_data().buffers()[0].len()
1109 );
1110 }
1111
1112 #[test]
1113 fn test_null_union() {
1114 for mode in [UnionMode::Sparse, UnionMode::Dense] {
1115 let data_type = DataType::Union(
1116 UnionFields::try_new(
1117 vec![2, 1],
1118 vec![
1119 Field::new("foo", DataType::Int32, true),
1120 Field::new("bar", DataType::Int64, true),
1121 ],
1122 )
1123 .unwrap(),
1124 mode,
1125 );
1126 let array = new_null_array(&data_type, 4);
1127
1128 let array = as_union_array(array.as_ref());
1129 assert_eq!(array.len(), 4);
1130 assert_eq!(array.null_count(), 0);
1131 assert_eq!(array.logical_null_count(), 4);
1132
1133 for i in 0..4 {
1134 let a = array.value(i);
1135 assert_eq!(a.len(), 1);
1136 assert_eq!(a.null_count(), 1);
1137 assert_eq!(a.logical_null_count(), 1);
1138 assert!(a.is_null(0))
1139 }
1140
1141 array.to_data().validate_full().unwrap();
1142 }
1143 }
1144
1145 #[test]
1146 #[allow(unused_parens)]
1147 fn test_null_runs() {
1148 for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1149 let data_type = DataType::RunEndEncoded(
1150 Arc::new(Field::new("run_ends", r, false)),
1151 Arc::new(Field::new("values", DataType::Utf8, true)),
1152 );
1153
1154 let array = new_null_array(&data_type, 4);
1155 let array = array.as_ref();
1156
1157 downcast_run_array! {
1158 array => {
1159 assert_eq!(array.len(), 4);
1160 assert_eq!(array.null_count(), 0);
1161 assert_eq!(array.logical_null_count(), 4);
1162 assert_eq!(array.values().len(), 1);
1163 assert_eq!(array.values().null_count(), 1);
1164 assert_eq!(array.run_ends().len(), 4);
1165 assert_eq!(array.run_ends().values(), &[4]);
1166
1167 let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1168 assert_eq!(idx, &[0,0,0,0]);
1169 }
1170 d => unreachable!("{d}")
1171 }
1172 }
1173 }
1174
1175 #[test]
1176 fn test_null_fixed_size_binary() {
1177 for size in [1, 2, 7] {
1178 let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1179 let array = array
1180 .as_ref()
1181 .as_any()
1182 .downcast_ref::<FixedSizeBinaryArray>()
1183 .unwrap();
1184
1185 assert_eq!(array.len(), 6);
1186 assert_eq!(array.null_count(), 6);
1187 assert_eq!(array.logical_null_count(), 6);
1188 array.iter().for_each(|x| assert!(x.is_none()));
1189 }
1190 }
1191
1192 #[test]
1193 fn test_memory_size_null() {
1194 let null_arr = NullArray::new(32);
1195
1196 assert_eq!(0, null_arr.get_buffer_memory_size());
1197 assert_eq!(
1198 std::mem::size_of::<usize>(),
1199 null_arr.get_array_memory_size()
1200 );
1201 }
1202
1203 #[test]
1204 fn test_memory_size_primitive() {
1205 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1206 let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1207
1208 assert_eq!(
1210 arr.get_array_memory_size() - empty.get_array_memory_size(),
1211 128 * std::mem::size_of::<i64>()
1212 );
1213 }
1214
1215 #[test]
1216 fn test_memory_size_primitive_sliced() {
1217 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1218 let slice1 = arr.slice(0, 64);
1219 let slice2 = arr.slice(64, 64);
1220
1221 assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1223 assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1224 }
1225
1226 #[test]
1227 fn test_memory_size_primitive_nullable() {
1228 let arr: PrimitiveArray<Int64Type> = (0..128)
1229 .map(|i| if i % 20 == 0 { Some(i) } else { None })
1230 .collect();
1231 let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1232 ArrayData::builder(arr.data_type().clone())
1233 .add_buffer(MutableBuffer::new(0).into())
1234 .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1235 .build()
1236 .unwrap(),
1237 );
1238
1239 assert_eq!(
1243 std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1244 empty_with_bitmap.get_array_memory_size()
1245 );
1246
1247 assert_eq!(
1250 arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1251 128 * std::mem::size_of::<i64>() + 64
1252 );
1253 }
1254
1255 #[test]
1256 fn test_memory_size_dictionary() {
1257 let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1258 let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1259 (0..256).map(|i| (i % values.len()) as i16),
1260 );
1261
1262 let dict_data_type = DataType::Dictionary(
1263 Box::new(keys.data_type().clone()),
1264 Box::new(values.data_type().clone()),
1265 );
1266 let dict_data = keys
1267 .into_data()
1268 .into_builder()
1269 .data_type(dict_data_type)
1270 .child_data(vec![values.into_data()])
1271 .build()
1272 .unwrap();
1273
1274 let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1275 Box::new(DataType::Int16),
1276 Box::new(DataType::Int64),
1277 ));
1278
1279 let arr = DictionaryArray::<Int16Type>::from(dict_data);
1280 let empty = DictionaryArray::<Int16Type>::from(empty_data);
1281
1282 let expected_keys_size = 256 * std::mem::size_of::<i16>();
1283 assert_eq!(
1284 arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1285 expected_keys_size
1286 );
1287
1288 let expected_values_size = 16 * std::mem::size_of::<i64>();
1289 assert_eq!(
1290 arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1291 expected_values_size
1292 );
1293
1294 let expected_size = expected_keys_size + expected_values_size;
1295 assert_eq!(
1296 arr.get_array_memory_size() - empty.get_array_memory_size(),
1297 expected_size
1298 );
1299 }
1300
1301 fn compute_my_thing(arr: &dyn Array) -> bool {
1303 !arr.is_empty()
1304 }
1305
1306 #[test]
1307 fn test_array_ref_as_array() {
1308 let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1309
1310 assert!(compute_my_thing(&arr));
1312
1313 let arr: ArrayRef = Arc::new(arr);
1315 assert!(compute_my_thing(&arr));
1316 assert!(compute_my_thing(arr.as_ref()));
1317 }
1318
1319 #[test]
1320 fn test_downcast_array() {
1321 let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1322
1323 let boxed: ArrayRef = Arc::new(array);
1324 let array: Int32Array = downcast_array(&boxed);
1325
1326 let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1327 assert_eq!(array, expected);
1328 }
1329}