1mod binary_array;
21
22use crate::types::*;
23use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ScalarBuffer};
24use arrow_data::ArrayData;
25use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26use std::any::Any;
27use std::sync::Arc;
28
29pub use binary_array::*;
30
31mod boolean_array;
32pub use boolean_array::*;
33
34mod byte_array;
35pub use byte_array::*;
36
37mod dictionary_array;
38pub use dictionary_array::*;
39
40mod fixed_size_binary_array;
41pub use fixed_size_binary_array::*;
42
43mod fixed_size_list_array;
44pub use fixed_size_list_array::*;
45
46mod list_array;
47pub use list_array::*;
48
49mod map_array;
50pub use map_array::*;
51
52mod null_array;
53pub use null_array::*;
54
55mod primitive_array;
56pub use primitive_array::*;
57
58mod string_array;
59pub use string_array::*;
60
61mod struct_array;
62pub use struct_array::*;
63
64mod union_array;
65pub use union_array::*;
66
67mod run_array;
68
69pub use run_array::*;
70
71mod byte_view_array;
72
73pub use byte_view_array::*;
74
75mod list_view_array;
76
77pub use list_view_array::*;
78
79use crate::iterator::ArrayIter;
80
81pub unsafe trait Array: std::fmt::Debug + Send + Sync {
101 fn as_any(&self) -> &dyn Any;
124
125 fn to_data(&self) -> ArrayData;
127
128 fn into_data(self) -> ArrayData;
132
133 fn data_type(&self) -> &DataType;
146
147 fn slice(&self, offset: usize, length: usize) -> ArrayRef;
161
162 fn len(&self) -> usize;
174
175 fn is_empty(&self) -> bool;
187
188 fn shrink_to_fit(&mut self) {}
193
194 fn offset(&self) -> usize;
210
211 fn nulls(&self) -> Option<&NullBuffer>;
224
225 fn logical_nulls(&self) -> Option<NullBuffer> {
244 self.nulls().cloned()
245 }
246
247 fn is_null(&self, index: usize) -> bool {
269 self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
270 }
271
272 fn is_valid(&self, index: usize) -> bool {
286 !self.is_null(index)
287 }
288
289 fn null_count(&self) -> usize {
305 self.nulls().map(|n| n.null_count()).unwrap_or_default()
306 }
307
308 fn logical_null_count(&self) -> usize {
325 self.logical_nulls()
326 .map(|n| n.null_count())
327 .unwrap_or_default()
328 }
329
330 fn is_nullable(&self) -> bool {
344 self.logical_null_count() != 0
345 }
346
347 fn get_buffer_memory_size(&self) -> usize;
352
353 fn get_array_memory_size(&self) -> usize;
357}
358
359pub type ArrayRef = Arc<dyn Array>;
361
362unsafe impl Array for ArrayRef {
364 fn as_any(&self) -> &dyn Any {
365 self.as_ref().as_any()
366 }
367
368 fn to_data(&self) -> ArrayData {
369 self.as_ref().to_data()
370 }
371
372 fn into_data(self) -> ArrayData {
373 self.to_data()
374 }
375
376 fn data_type(&self) -> &DataType {
377 self.as_ref().data_type()
378 }
379
380 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
381 self.as_ref().slice(offset, length)
382 }
383
384 fn len(&self) -> usize {
385 self.as_ref().len()
386 }
387
388 fn is_empty(&self) -> bool {
389 self.as_ref().is_empty()
390 }
391
392 fn shrink_to_fit(&mut self) {
394 if let Some(slf) = Arc::get_mut(self) {
395 slf.shrink_to_fit();
396 } else {
397 }
399 }
400
401 fn offset(&self) -> usize {
402 self.as_ref().offset()
403 }
404
405 fn nulls(&self) -> Option<&NullBuffer> {
406 self.as_ref().nulls()
407 }
408
409 fn logical_nulls(&self) -> Option<NullBuffer> {
410 self.as_ref().logical_nulls()
411 }
412
413 fn is_null(&self, index: usize) -> bool {
414 self.as_ref().is_null(index)
415 }
416
417 fn is_valid(&self, index: usize) -> bool {
418 self.as_ref().is_valid(index)
419 }
420
421 fn null_count(&self) -> usize {
422 self.as_ref().null_count()
423 }
424
425 fn logical_null_count(&self) -> usize {
426 self.as_ref().logical_null_count()
427 }
428
429 fn is_nullable(&self) -> bool {
430 self.as_ref().is_nullable()
431 }
432
433 fn get_buffer_memory_size(&self) -> usize {
434 self.as_ref().get_buffer_memory_size()
435 }
436
437 fn get_array_memory_size(&self) -> usize {
438 self.as_ref().get_array_memory_size()
439 }
440}
441
442unsafe impl<T: Array> Array for &T {
443 fn as_any(&self) -> &dyn Any {
444 T::as_any(self)
445 }
446
447 fn to_data(&self) -> ArrayData {
448 T::to_data(self)
449 }
450
451 fn into_data(self) -> ArrayData {
452 self.to_data()
453 }
454
455 fn data_type(&self) -> &DataType {
456 T::data_type(self)
457 }
458
459 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
460 T::slice(self, offset, length)
461 }
462
463 fn len(&self) -> usize {
464 T::len(self)
465 }
466
467 fn is_empty(&self) -> bool {
468 T::is_empty(self)
469 }
470
471 fn offset(&self) -> usize {
472 T::offset(self)
473 }
474
475 fn nulls(&self) -> Option<&NullBuffer> {
476 T::nulls(self)
477 }
478
479 fn logical_nulls(&self) -> Option<NullBuffer> {
480 T::logical_nulls(self)
481 }
482
483 fn is_null(&self, index: usize) -> bool {
484 T::is_null(self, index)
485 }
486
487 fn is_valid(&self, index: usize) -> bool {
488 T::is_valid(self, index)
489 }
490
491 fn null_count(&self) -> usize {
492 T::null_count(self)
493 }
494
495 fn logical_null_count(&self) -> usize {
496 T::logical_null_count(self)
497 }
498
499 fn is_nullable(&self) -> bool {
500 T::is_nullable(self)
501 }
502
503 fn get_buffer_memory_size(&self) -> usize {
504 T::get_buffer_memory_size(self)
505 }
506
507 fn get_array_memory_size(&self) -> usize {
508 T::get_array_memory_size(self)
509 }
510}
511
512pub trait ArrayAccessor: Array {
593 type Item: Send + Sync;
595
596 fn value(&self, index: usize) -> Self::Item;
600
601 unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
605}
606
607pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
615 fn is_ascii(&self) -> bool;
617
618 fn iter(&self) -> ArrayIter<Self>;
620}
621
622impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
623 fn is_ascii(&self) -> bool {
624 GenericStringArray::<O>::is_ascii(self)
625 }
626
627 fn iter(&self) -> ArrayIter<Self> {
628 GenericStringArray::<O>::iter(self)
629 }
630}
631impl<'a> StringArrayType<'a> for &'a StringViewArray {
632 fn is_ascii(&self) -> bool {
633 StringViewArray::is_ascii(self)
634 }
635
636 fn iter(&self) -> ArrayIter<Self> {
637 StringViewArray::iter(self)
638 }
639}
640
641pub trait BinaryArrayType<'a>: ArrayAccessor<Item = &'a [u8]> + Sized {
650 fn iter(&self) -> ArrayIter<Self>;
652}
653
654impl<'a, O: OffsetSizeTrait> BinaryArrayType<'a> for &'a GenericBinaryArray<O> {
655 fn iter(&self) -> ArrayIter<Self> {
656 GenericBinaryArray::<O>::iter(self)
657 }
658}
659impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
660 fn iter(&self) -> ArrayIter<Self> {
661 BinaryViewArray::iter(self)
662 }
663}
664impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
665 fn iter(&self) -> ArrayIter<Self> {
666 FixedSizeBinaryArray::iter(self)
667 }
668}
669
670impl PartialEq for dyn Array + '_ {
671 fn eq(&self, other: &Self) -> bool {
672 self.to_data().eq(&other.to_data())
673 }
674}
675
676impl<T: Array> PartialEq<T> for dyn Array + '_ {
677 fn eq(&self, other: &T) -> bool {
678 self.to_data().eq(&other.to_data())
679 }
680}
681
682impl PartialEq for NullArray {
683 fn eq(&self, other: &NullArray) -> bool {
684 self.to_data().eq(&other.to_data())
685 }
686}
687
688impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
689 fn eq(&self, other: &PrimitiveArray<T>) -> bool {
690 self.to_data().eq(&other.to_data())
691 }
692}
693
694impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
695 fn eq(&self, other: &Self) -> bool {
696 self.to_data().eq(&other.to_data())
697 }
698}
699
700impl PartialEq for BooleanArray {
701 fn eq(&self, other: &BooleanArray) -> bool {
702 self.to_data().eq(&other.to_data())
703 }
704}
705
706impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
707 fn eq(&self, other: &Self) -> bool {
708 self.to_data().eq(&other.to_data())
709 }
710}
711
712impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
713 fn eq(&self, other: &Self) -> bool {
714 self.to_data().eq(&other.to_data())
715 }
716}
717
718impl PartialEq for FixedSizeBinaryArray {
719 fn eq(&self, other: &Self) -> bool {
720 self.to_data().eq(&other.to_data())
721 }
722}
723
724impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
725 fn eq(&self, other: &Self) -> bool {
726 self.to_data().eq(&other.to_data())
727 }
728}
729
730impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
731 fn eq(&self, other: &Self) -> bool {
732 self.to_data().eq(&other.to_data())
733 }
734}
735
736impl PartialEq for MapArray {
737 fn eq(&self, other: &Self) -> bool {
738 self.to_data().eq(&other.to_data())
739 }
740}
741
742impl PartialEq for FixedSizeListArray {
743 fn eq(&self, other: &Self) -> bool {
744 self.to_data().eq(&other.to_data())
745 }
746}
747
748impl PartialEq for StructArray {
749 fn eq(&self, other: &Self) -> bool {
750 self.to_data().eq(&other.to_data())
751 }
752}
753
754impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
755 fn eq(&self, other: &Self) -> bool {
756 self.to_data().eq(&other.to_data())
757 }
758}
759
760impl<R: RunEndIndexType> PartialEq for RunArray<R> {
761 fn eq(&self, other: &Self) -> bool {
762 self.to_data().eq(&other.to_data())
763 }
764}
765
766pub fn make_array(data: ArrayData) -> ArrayRef {
797 match data.data_type() {
798 DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
799 DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
800 DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
801 DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
802 DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
803 DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
804 DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
805 DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
806 DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
807 DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
808 DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
809 DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
810 DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
811 DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
812 DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
813 DataType::Time32(TimeUnit::Millisecond) => {
814 Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
815 }
816 DataType::Time64(TimeUnit::Microsecond) => {
817 Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
818 }
819 DataType::Time64(TimeUnit::Nanosecond) => {
820 Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
821 }
822 DataType::Timestamp(TimeUnit::Second, _) => {
823 Arc::new(TimestampSecondArray::from(data)) as ArrayRef
824 }
825 DataType::Timestamp(TimeUnit::Millisecond, _) => {
826 Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
827 }
828 DataType::Timestamp(TimeUnit::Microsecond, _) => {
829 Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
830 }
831 DataType::Timestamp(TimeUnit::Nanosecond, _) => {
832 Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
833 }
834 DataType::Interval(IntervalUnit::YearMonth) => {
835 Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
836 }
837 DataType::Interval(IntervalUnit::DayTime) => {
838 Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
839 }
840 DataType::Interval(IntervalUnit::MonthDayNano) => {
841 Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
842 }
843 DataType::Duration(TimeUnit::Second) => {
844 Arc::new(DurationSecondArray::from(data)) as ArrayRef
845 }
846 DataType::Duration(TimeUnit::Millisecond) => {
847 Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
848 }
849 DataType::Duration(TimeUnit::Microsecond) => {
850 Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
851 }
852 DataType::Duration(TimeUnit::Nanosecond) => {
853 Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
854 }
855 DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
856 DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
857 DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
858 DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
859 DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
860 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
861 DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
862 DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
863 DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
864 DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
865 DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
866 DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
867 DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
868 DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
869 DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
870 DataType::Dictionary(key_type, _) => match key_type.as_ref() {
871 DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
872 DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
873 DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
874 DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
875 DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
876 DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
877 DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
878 DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
879 dt => unimplemented!("Unexpected dictionary key type {dt}"),
880 },
881 DataType::RunEndEncoded(run_ends_type, _) => match run_ends_type.data_type() {
882 DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
883 DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
884 DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
885 dt => unimplemented!("Unexpected data type for run_ends array {dt}"),
886 },
887 DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
888 DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
889 DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
890 DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
891 DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
892 dt => unimplemented!("Unexpected data type {dt}"),
893 }
894}
895
896pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
909 let data = ArrayData::new_empty(data_type);
910 make_array(data)
911}
912
913pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
927 make_array(ArrayData::new_null(data_type, length))
928}
929
930unsafe fn get_offsets_from_buffer<O: ArrowNativeType>(
937 buffer: Buffer,
938 offset: usize,
939 len: usize,
940) -> OffsetBuffer<O> {
941 if len == 0 && buffer.is_empty() {
942 return OffsetBuffer::new_empty();
943 }
944
945 let scalar_buffer = ScalarBuffer::new(buffer, offset, len + 1);
946 unsafe { OffsetBuffer::new_unchecked(scalar_buffer) }
949}
950
951fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
953where
954 A: Array,
955 F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
956{
957 let head = std::cmp::min(10, array.len());
958
959 for i in 0..head {
960 if array.is_null(i) {
961 writeln!(f, " null,")?;
962 } else {
963 write!(f, " ")?;
964 print_item(array, i, f)?;
965 writeln!(f, ",")?;
966 }
967 }
968 if array.len() > 10 {
969 if array.len() > 20 {
970 writeln!(f, " ...{} elements...,", array.len() - 20)?;
971 }
972
973 let tail = std::cmp::max(head, array.len() - 10);
974
975 for i in tail..array.len() {
976 if array.is_null(i) {
977 writeln!(f, " null,")?;
978 } else {
979 write!(f, " ")?;
980 print_item(array, i, f)?;
981 writeln!(f, ",")?;
982 }
983 }
984 }
985 Ok(())
986}
987
988#[cfg(test)]
989mod tests {
990 use super::*;
991 use crate::cast::{as_union_array, downcast_array};
992 use crate::downcast_run_array;
993 use arrow_buffer::MutableBuffer;
994 use arrow_schema::{Field, Fields, UnionFields, UnionMode};
995
996 #[test]
997 fn test_empty_primitive() {
998 let array = new_empty_array(&DataType::Int32);
999 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
1000 assert_eq!(a.len(), 0);
1001 let expected: &[i32] = &[];
1002 assert_eq!(a.values(), expected);
1003 }
1004
1005 #[test]
1006 fn test_empty_variable_sized() {
1007 let array = new_empty_array(&DataType::Utf8);
1008 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1009 assert_eq!(a.len(), 0);
1010 assert_eq!(a.value_offsets()[0], 0i32);
1011 }
1012
1013 #[test]
1014 fn test_empty_list_primitive() {
1015 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1016 let array = new_empty_array(&data_type);
1017 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1018 assert_eq!(a.len(), 0);
1019 assert_eq!(a.value_offsets()[0], 0i32);
1020 }
1021
1022 #[test]
1023 fn test_null_boolean() {
1024 let array = new_null_array(&DataType::Boolean, 9);
1025 let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
1026 assert_eq!(a.len(), 9);
1027 for i in 0..9 {
1028 assert!(a.is_null(i));
1029 }
1030 }
1031
1032 #[test]
1033 fn test_null_primitive() {
1034 let array = new_null_array(&DataType::Int32, 9);
1035 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
1036 assert_eq!(a.len(), 9);
1037 for i in 0..9 {
1038 assert!(a.is_null(i));
1039 }
1040 }
1041
1042 #[test]
1043 fn test_null_struct() {
1044 let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
1047 let array = new_null_array(&struct_type, 9);
1048
1049 let a = array.as_any().downcast_ref::<StructArray>().unwrap();
1050 assert_eq!(a.len(), 9);
1051 assert_eq!(a.column(0).len(), 9);
1052 for i in 0..9 {
1053 assert!(a.is_null(i));
1054 }
1055
1056 a.slice(0, 5);
1058 }
1059
1060 #[test]
1061 fn test_null_variable_sized() {
1062 let array = new_null_array(&DataType::Utf8, 9);
1063 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1064 assert_eq!(a.len(), 9);
1065 assert_eq!(a.value_offsets()[9], 0i32);
1066 for i in 0..9 {
1067 assert!(a.is_null(i));
1068 }
1069 }
1070
1071 #[test]
1072 fn test_null_list_primitive() {
1073 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
1074 let array = new_null_array(&data_type, 9);
1075 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1076 assert_eq!(a.len(), 9);
1077 assert_eq!(a.value_offsets()[9], 0i32);
1078 for i in 0..9 {
1079 assert!(a.is_null(i));
1080 }
1081 }
1082
1083 #[test]
1084 fn test_null_map() {
1085 let data_type = DataType::Map(
1086 Arc::new(Field::new(
1087 "entry",
1088 DataType::Struct(Fields::from(vec![
1089 Field::new("key", DataType::Utf8, false),
1090 Field::new("value", DataType::Int32, true),
1091 ])),
1092 false,
1093 )),
1094 false,
1095 );
1096 let array = new_null_array(&data_type, 9);
1097 let a = array.as_any().downcast_ref::<MapArray>().unwrap();
1098 assert_eq!(a.len(), 9);
1099 assert_eq!(a.value_offsets()[9], 0i32);
1100 for i in 0..9 {
1101 assert!(a.is_null(i));
1102 }
1103 }
1104
1105 #[test]
1106 fn test_null_dictionary() {
1107 let values =
1108 vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1109
1110 let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1111 let array = Arc::new(array) as ArrayRef;
1112
1113 let null_array = new_null_array(array.data_type(), 9);
1114 assert_eq!(&array, &null_array);
1115 assert_eq!(
1116 array.to_data().buffers()[0].len(),
1117 null_array.to_data().buffers()[0].len()
1118 );
1119 }
1120
1121 #[test]
1122 fn test_null_union() {
1123 for mode in [UnionMode::Sparse, UnionMode::Dense] {
1124 let data_type = DataType::Union(
1125 UnionFields::try_new(
1126 vec![2, 1],
1127 vec![
1128 Field::new("foo", DataType::Int32, true),
1129 Field::new("bar", DataType::Int64, true),
1130 ],
1131 )
1132 .unwrap(),
1133 mode,
1134 );
1135 let array = new_null_array(&data_type, 4);
1136
1137 let array = as_union_array(array.as_ref());
1138 assert_eq!(array.len(), 4);
1139 assert_eq!(array.null_count(), 0);
1140 assert_eq!(array.logical_null_count(), 4);
1141
1142 for i in 0..4 {
1143 let a = array.value(i);
1144 assert_eq!(a.len(), 1);
1145 assert_eq!(a.null_count(), 1);
1146 assert_eq!(a.logical_null_count(), 1);
1147 assert!(a.is_null(0))
1148 }
1149
1150 array.to_data().validate_full().unwrap();
1151 }
1152 }
1153
1154 #[test]
1155 #[allow(unused_parens)]
1156 fn test_null_runs() {
1157 for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1158 let data_type = DataType::RunEndEncoded(
1159 Arc::new(Field::new("run_ends", r, false)),
1160 Arc::new(Field::new("values", DataType::Utf8, true)),
1161 );
1162
1163 let array = new_null_array(&data_type, 4);
1164 let array = array.as_ref();
1165
1166 downcast_run_array! {
1167 array => {
1168 assert_eq!(array.len(), 4);
1169 assert_eq!(array.null_count(), 0);
1170 assert_eq!(array.logical_null_count(), 4);
1171 assert_eq!(array.values().len(), 1);
1172 assert_eq!(array.values().null_count(), 1);
1173 assert_eq!(array.run_ends().len(), 4);
1174 assert_eq!(array.run_ends().values(), &[4]);
1175
1176 let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1177 assert_eq!(idx, &[0,0,0,0]);
1178 }
1179 d => unreachable!("{d}")
1180 }
1181 }
1182 }
1183
1184 #[test]
1185 fn test_null_fixed_size_binary() {
1186 for size in [1, 2, 7] {
1187 let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1188 let array = array
1189 .as_ref()
1190 .as_any()
1191 .downcast_ref::<FixedSizeBinaryArray>()
1192 .unwrap();
1193
1194 assert_eq!(array.len(), 6);
1195 assert_eq!(array.null_count(), 6);
1196 assert_eq!(array.logical_null_count(), 6);
1197 array.iter().for_each(|x| assert!(x.is_none()));
1198 }
1199 }
1200
1201 #[test]
1202 fn test_memory_size_null() {
1203 let null_arr = NullArray::new(32);
1204
1205 assert_eq!(0, null_arr.get_buffer_memory_size());
1206 assert_eq!(
1207 std::mem::size_of::<usize>(),
1208 null_arr.get_array_memory_size()
1209 );
1210 }
1211
1212 #[test]
1213 fn test_memory_size_primitive() {
1214 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1215 let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1216
1217 assert_eq!(
1219 arr.get_array_memory_size() - empty.get_array_memory_size(),
1220 128 * std::mem::size_of::<i64>()
1221 );
1222 }
1223
1224 #[test]
1225 fn test_memory_size_primitive_sliced() {
1226 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1227 let slice1 = arr.slice(0, 64);
1228 let slice2 = arr.slice(64, 64);
1229
1230 assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1232 assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1233 }
1234
1235 #[test]
1236 fn test_memory_size_primitive_nullable() {
1237 let arr: PrimitiveArray<Int64Type> = (0..128)
1238 .map(|i| if i % 20 == 0 { Some(i) } else { None })
1239 .collect();
1240 let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1241 ArrayData::builder(arr.data_type().clone())
1242 .add_buffer(MutableBuffer::new(0).into())
1243 .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1244 .build()
1245 .unwrap(),
1246 );
1247
1248 assert_eq!(
1252 std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1253 empty_with_bitmap.get_array_memory_size()
1254 );
1255
1256 assert_eq!(
1259 arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1260 128 * std::mem::size_of::<i64>() + 64
1261 );
1262 }
1263
1264 #[test]
1265 fn test_memory_size_dictionary() {
1266 let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1267 let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1268 (0..256).map(|i| (i % values.len()) as i16),
1269 );
1270
1271 let dict_data_type = DataType::Dictionary(
1272 Box::new(keys.data_type().clone()),
1273 Box::new(values.data_type().clone()),
1274 );
1275 let dict_data = keys
1276 .into_data()
1277 .into_builder()
1278 .data_type(dict_data_type)
1279 .child_data(vec![values.into_data()])
1280 .build()
1281 .unwrap();
1282
1283 let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1284 Box::new(DataType::Int16),
1285 Box::new(DataType::Int64),
1286 ));
1287
1288 let arr = DictionaryArray::<Int16Type>::from(dict_data);
1289 let empty = DictionaryArray::<Int16Type>::from(empty_data);
1290
1291 let expected_keys_size = 256 * std::mem::size_of::<i16>();
1292 assert_eq!(
1293 arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1294 expected_keys_size
1295 );
1296
1297 let expected_values_size = 16 * std::mem::size_of::<i64>();
1298 assert_eq!(
1299 arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1300 expected_values_size
1301 );
1302
1303 let expected_size = expected_keys_size + expected_values_size;
1304 assert_eq!(
1305 arr.get_array_memory_size() - empty.get_array_memory_size(),
1306 expected_size
1307 );
1308 }
1309
1310 fn compute_my_thing(arr: &dyn Array) -> bool {
1312 !arr.is_empty()
1313 }
1314
1315 #[test]
1316 fn test_array_ref_as_array() {
1317 let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1318
1319 assert!(compute_my_thing(&arr));
1321
1322 let arr: ArrayRef = Arc::new(arr);
1324 assert!(compute_my_thing(&arr));
1325 assert!(compute_my_thing(arr.as_ref()));
1326 }
1327
1328 #[test]
1329 fn test_downcast_array() {
1330 let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1331
1332 let boxed: ArrayRef = Arc::new(array);
1333 let array: Int32Array = downcast_array(&boxed);
1334
1335 let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1336 assert_eq!(array, expected);
1337 }
1338}