1use crate::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder};
19use crate::cast::AsArray;
20use crate::iterator::ArrayIter;
21use crate::types::*;
22use crate::{
23 Array, ArrayAccessor, ArrayRef, ArrowNativeTypeOp, PrimitiveArray, Scalar, StringArray,
24 make_array,
25};
26use arrow_buffer::bit_util::set_bit;
27use arrow_buffer::buffer::NullBuffer;
28use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder, ScalarBuffer};
29use arrow_data::ArrayData;
30use arrow_schema::{ArrowError, DataType};
31use std::any::Any;
32use std::sync::Arc;
33
34pub type Int8DictionaryArray = DictionaryArray<Int8Type>;
49
50pub type Int16DictionaryArray = DictionaryArray<Int16Type>;
65
66pub type Int32DictionaryArray = DictionaryArray<Int32Type>;
81
82pub type Int64DictionaryArray = DictionaryArray<Int64Type>;
97
98pub type UInt8DictionaryArray = DictionaryArray<UInt8Type>;
113
114pub type UInt16DictionaryArray = DictionaryArray<UInt16Type>;
129
130pub type UInt32DictionaryArray = DictionaryArray<UInt32Type>;
145
146pub type UInt64DictionaryArray = DictionaryArray<UInt64Type>;
161
162pub struct DictionaryArray<K: ArrowDictionaryKeyType> {
244 data_type: DataType,
245
246 keys: PrimitiveArray<K>,
251
252 values: ArrayRef,
254
255 is_ordered: bool,
257}
258
259impl<K: ArrowDictionaryKeyType> Clone for DictionaryArray<K> {
260 fn clone(&self) -> Self {
261 Self {
262 data_type: self.data_type.clone(),
263 keys: self.keys.clone(),
264 values: self.values.clone(),
265 is_ordered: self.is_ordered,
266 }
267 }
268}
269
270impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
271 pub fn new(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
279 Self::try_new(keys, values).unwrap()
280 }
281
282 pub fn try_new(keys: PrimitiveArray<K>, values: ArrayRef) -> Result<Self, ArrowError> {
290 let data_type = DataType::Dictionary(
291 Box::new(keys.data_type().clone()),
292 Box::new(values.data_type().clone()),
293 );
294
295 let all_null = keys.null_count() == keys.len();
297
298 if !all_null {
299 let zero = K::Native::usize_as(0);
300 let values_len = values.len();
301
302 if let Some((idx, v)) = keys.values().iter().enumerate().find(|(idx, v)| {
303 (v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
304 }) {
305 return Err(ArrowError::InvalidArgumentError(format!(
306 "Invalid dictionary key {v:?} at index {idx}, expected 0 <= key < {values_len}",
307 )));
308 }
309 }
310
311 Ok(Self {
312 data_type,
313 keys,
314 values,
315 is_ordered: false,
316 })
317 }
318
319 pub fn new_scalar<T: Array + 'static>(value: Scalar<T>) -> Scalar<Self> {
321 Scalar::new(Self::new(
322 PrimitiveArray::new(vec![K::Native::usize_as(0)].into(), None),
323 Arc::new(value.into_inner()),
324 ))
325 }
326
327 pub unsafe fn new_unchecked(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
333 if cfg!(feature = "force_validate") {
334 return Self::new(keys, values);
335 }
336
337 let data_type = DataType::Dictionary(
338 Box::new(keys.data_type().clone()),
339 Box::new(values.data_type().clone()),
340 );
341
342 Self {
343 data_type,
344 keys,
345 values,
346 is_ordered: false,
347 }
348 }
349
350 pub fn into_parts(self) -> (PrimitiveArray<K>, ArrayRef) {
352 (self.keys, self.values)
353 }
354
355 pub fn keys(&self) -> &PrimitiveArray<K> {
357 &self.keys
358 }
359
360 pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
366 let rd_buf: &StringArray = self.values.as_any().downcast_ref::<StringArray>().unwrap();
367
368 (0..rd_buf.len())
369 .position(|i| rd_buf.value(i) == value)
370 .and_then(K::Native::from_usize)
371 }
372
373 pub fn values(&self) -> &ArrayRef {
375 &self.values
376 }
377
378 pub fn value_type(&self) -> DataType {
380 self.values.data_type().clone()
381 }
382
383 pub fn len(&self) -> usize {
385 self.keys.len()
386 }
387
388 pub fn is_empty(&self) -> bool {
390 self.keys.is_empty()
391 }
392
393 pub fn is_ordered(&self) -> bool {
395 self.is_ordered
396 }
397
398 pub fn keys_iter(&self) -> impl Iterator<Item = Option<usize>> + '_ {
400 self.keys.iter().map(|key| key.map(|k| k.as_usize()))
401 }
402
403 pub fn key(&self, i: usize) -> Option<usize> {
406 self.keys.is_valid(i).then(|| self.keys.value(i).as_usize())
407 }
408
409 pub fn slice(&self, offset: usize, length: usize) -> Self {
411 Self {
412 data_type: self.data_type.clone(),
413 keys: self.keys.slice(offset, length),
414 values: self.values.clone(),
415 is_ordered: self.is_ordered,
416 }
417 }
418
419 pub fn downcast_dict<V: 'static>(&self) -> Option<TypedDictionaryArray<'_, K, V>> {
433 let values = self.values.as_any().downcast_ref()?;
434 Some(TypedDictionaryArray {
435 dictionary: self,
436 values,
437 })
438 }
439
440 pub fn with_values(&self, values: ArrayRef) -> Self {
478 assert!(values.len() >= self.values.len());
479 let data_type =
480 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
481 Self {
482 data_type,
483 keys: self.keys.clone(),
484 values,
485 is_ordered: false,
486 }
487 }
488
489 #[allow(clippy::result_large_err)]
492 pub fn into_primitive_dict_builder<V>(self) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
493 where
494 V: ArrowPrimitiveType,
495 {
496 if !self.value_type().is_primitive() {
497 return Err(self);
498 }
499
500 let key_array = self.keys().clone();
501 let value_array = self.values().as_primitive::<V>().clone();
502
503 drop(self.keys);
504 drop(self.values);
505
506 let key_builder = key_array.into_builder();
507 let value_builder = value_array.into_builder();
508
509 match (key_builder, value_builder) {
510 (Ok(key_builder), Ok(value_builder)) => Ok(unsafe {
511 PrimitiveDictionaryBuilder::new_from_builders(key_builder, value_builder)
512 }),
513 (Err(key_array), Ok(mut value_builder)) => {
514 Err(Self::try_new(key_array, Arc::new(value_builder.finish())).unwrap())
515 }
516 (Ok(mut key_builder), Err(value_array)) => {
517 Err(Self::try_new(key_builder.finish(), Arc::new(value_array)).unwrap())
518 }
519 (Err(key_array), Err(value_array)) => {
520 Err(Self::try_new(key_array, Arc::new(value_array)).unwrap())
521 }
522 }
523 }
524
525 #[allow(clippy::result_large_err)]
549 pub fn unary_mut<F, V>(self, op: F) -> Result<DictionaryArray<K>, DictionaryArray<K>>
550 where
551 V: ArrowPrimitiveType,
552 F: Fn(V::Native) -> V::Native,
553 {
554 let mut builder: PrimitiveDictionaryBuilder<K, V> = self.into_primitive_dict_builder()?;
555 builder
556 .values_slice_mut()
557 .iter_mut()
558 .for_each(|v| *v = op(*v));
559 Ok(builder.finish())
560 }
561
562 pub fn occupancy(&self) -> BooleanBuffer {
567 let len = self.values.len();
568 let mut builder = BooleanBufferBuilder::new(len);
569 builder.resize(len);
570 let slice = builder.as_slice_mut();
571 match self.keys.nulls().filter(|n| n.null_count() > 0) {
572 Some(n) => {
573 let v = self.keys.values();
574 n.valid_indices()
575 .for_each(|idx| set_bit(slice, v[idx].as_usize()))
576 }
577 None => {
578 let v = self.keys.values();
579 v.iter().for_each(|v| set_bit(slice, v.as_usize()))
580 }
581 }
582 builder.finish()
583 }
584}
585
586impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
588 fn from(data: ArrayData) -> Self {
589 let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
590
591 assert_eq!(
592 buffers.len(),
593 1,
594 "DictionaryArray data should contain a single buffer only (keys)."
595 );
596 let buffer = buffers.pop().expect("checked above");
597 assert_eq!(
598 child_data.len(),
599 1,
600 "DictionaryArray should contain a single child array (values)."
601 );
602 let cd = child_data.pop().expect("checked above");
603
604 if let DataType::Dictionary(key_data_type, _) = &data_type {
605 assert_eq!(
606 &T::DATA_TYPE,
607 key_data_type.as_ref(),
608 "DictionaryArray's data type must match, expected {} got {}",
609 T::DATA_TYPE,
610 key_data_type
611 );
612
613 let values = make_array(cd);
614
615 let keys = PrimitiveArray::<T>::new(ScalarBuffer::new(buffer, offset, len), nulls);
617
618 Self {
619 data_type,
620 keys,
621 values,
622 is_ordered: false,
623 }
624 } else {
625 panic!("DictionaryArray must have Dictionary data type.")
626 }
627 }
628}
629
630impl<T: ArrowDictionaryKeyType> From<DictionaryArray<T>> for ArrayData {
631 fn from(array: DictionaryArray<T>) -> Self {
632 let builder = array
633 .keys
634 .into_data()
635 .into_builder()
636 .data_type(array.data_type)
637 .child_data(vec![array.values.to_data()]);
638
639 unsafe { builder.build_unchecked() }
640 }
641}
642
643impl<'a, T: ArrowDictionaryKeyType> FromIterator<Option<&'a str>> for DictionaryArray<T> {
660 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
661 let it = iter.into_iter();
662 let (lower, _) = it.size_hint();
663 let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
664 builder.extend(it);
665 builder.finish()
666 }
667}
668
669impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str> for DictionaryArray<T> {
684 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
685 let it = iter.into_iter();
686 let (lower, _) = it.size_hint();
687 let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
688 it.for_each(|i| {
689 builder
690 .append(i)
691 .expect("Unable to append a value to a dictionary array.");
692 });
693
694 builder.finish()
695 }
696}
697
698unsafe impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
700 fn as_any(&self) -> &dyn Any {
701 self
702 }
703
704 fn to_data(&self) -> ArrayData {
705 self.clone().into()
706 }
707
708 fn into_data(self) -> ArrayData {
709 self.into()
710 }
711
712 fn data_type(&self) -> &DataType {
713 &self.data_type
714 }
715
716 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
717 Arc::new(self.slice(offset, length))
718 }
719
720 fn len(&self) -> usize {
721 self.keys.len()
722 }
723
724 fn is_empty(&self) -> bool {
725 self.keys.is_empty()
726 }
727
728 fn shrink_to_fit(&mut self) {
729 self.keys.shrink_to_fit();
730 self.values.shrink_to_fit();
731 }
732
733 fn offset(&self) -> usize {
734 self.keys.offset()
735 }
736
737 fn nulls(&self) -> Option<&NullBuffer> {
738 self.keys.nulls()
739 }
740
741 fn logical_nulls(&self) -> Option<NullBuffer> {
742 match self.values.logical_nulls() {
743 None => self.nulls().cloned(),
744 Some(value_nulls) => {
745 let mut builder = BooleanBufferBuilder::new(self.len());
746 match self.keys.nulls() {
747 Some(n) => builder.append_buffer(n.inner()),
748 None => builder.append_n(self.len(), true),
749 }
750 for (idx, k) in self.keys.values().iter().enumerate() {
751 let k = k.as_usize();
752 if k < value_nulls.len() && value_nulls.is_null(k) {
754 builder.set_bit(idx, false);
755 }
756 }
757 Some(builder.finish().into())
758 }
759 }
760 }
761
762 fn logical_null_count(&self) -> usize {
763 match (self.keys.nulls(), self.values.logical_nulls()) {
764 (None, None) => 0,
765 (Some(key_nulls), None) => key_nulls.null_count(),
766 (None, Some(value_nulls)) => self
767 .keys
768 .values()
769 .iter()
770 .filter(|k| value_nulls.is_null(k.as_usize()))
771 .count(),
772 (Some(key_nulls), Some(value_nulls)) => self
773 .keys
774 .values()
775 .iter()
776 .enumerate()
777 .filter(|(idx, k)| key_nulls.is_null(*idx) || value_nulls.is_null(k.as_usize()))
778 .count(),
779 }
780 }
781
782 fn is_nullable(&self) -> bool {
783 !self.is_empty() && (self.nulls().is_some() || self.values.is_nullable())
784 }
785
786 fn get_buffer_memory_size(&self) -> usize {
787 self.keys.get_buffer_memory_size() + self.values.get_buffer_memory_size()
788 }
789
790 fn get_array_memory_size(&self) -> usize {
791 std::mem::size_of::<Self>()
792 + self.keys.get_buffer_memory_size()
793 + self.values.get_array_memory_size()
794 }
795
796 #[cfg(feature = "pool")]
797 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
798 self.keys.claim(pool);
799 self.values.claim(pool);
800 }
801}
802
803impl<T: ArrowDictionaryKeyType> std::fmt::Debug for DictionaryArray<T> {
804 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
805 writeln!(
806 f,
807 "DictionaryArray {{keys: {:?} values: {:?}}}",
808 self.keys, self.values
809 )
810 }
811}
812
813pub struct TypedDictionaryArray<'a, K: ArrowDictionaryKeyType, V> {
831 dictionary: &'a DictionaryArray<K>,
833 values: &'a V,
835}
836
837impl<K: ArrowDictionaryKeyType, V> Clone for TypedDictionaryArray<'_, K, V> {
839 fn clone(&self) -> Self {
840 *self
841 }
842}
843
844impl<K: ArrowDictionaryKeyType, V> Copy for TypedDictionaryArray<'_, K, V> {}
845
846impl<K: ArrowDictionaryKeyType, V> std::fmt::Debug for TypedDictionaryArray<'_, K, V> {
847 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
848 writeln!(f, "TypedDictionaryArray({:?})", self.dictionary)
849 }
850}
851
852impl<'a, K: ArrowDictionaryKeyType, V> TypedDictionaryArray<'a, K, V> {
853 pub fn keys(&self) -> &'a PrimitiveArray<K> {
855 self.dictionary.keys()
856 }
857
858 pub fn values(&self) -> &'a V {
860 self.values
861 }
862}
863
864unsafe impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V> {
865 fn as_any(&self) -> &dyn Any {
866 self.dictionary
867 }
868
869 fn to_data(&self) -> ArrayData {
870 self.dictionary.to_data()
871 }
872
873 fn into_data(self) -> ArrayData {
874 self.dictionary.into_data()
875 }
876
877 fn data_type(&self) -> &DataType {
878 self.dictionary.data_type()
879 }
880
881 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
882 Arc::new(self.dictionary.slice(offset, length))
883 }
884
885 fn len(&self) -> usize {
886 self.dictionary.len()
887 }
888
889 fn is_empty(&self) -> bool {
890 self.dictionary.is_empty()
891 }
892
893 fn offset(&self) -> usize {
894 self.dictionary.offset()
895 }
896
897 fn nulls(&self) -> Option<&NullBuffer> {
898 self.dictionary.nulls()
899 }
900
901 fn logical_nulls(&self) -> Option<NullBuffer> {
902 self.dictionary.logical_nulls()
903 }
904
905 fn logical_null_count(&self) -> usize {
906 self.dictionary.logical_null_count()
907 }
908
909 fn is_nullable(&self) -> bool {
910 self.dictionary.is_nullable()
911 }
912
913 fn get_buffer_memory_size(&self) -> usize {
914 self.dictionary.get_buffer_memory_size()
915 }
916
917 fn get_array_memory_size(&self) -> usize {
918 self.dictionary.get_array_memory_size()
919 }
920
921 #[cfg(feature = "pool")]
922 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
923 self.dictionary.claim(pool);
924 }
925}
926
927impl<K, V> IntoIterator for TypedDictionaryArray<'_, K, V>
928where
929 K: ArrowDictionaryKeyType,
930 Self: ArrayAccessor,
931{
932 type Item = Option<<Self as ArrayAccessor>::Item>;
933 type IntoIter = ArrayIter<Self>;
934
935 fn into_iter(self) -> Self::IntoIter {
936 ArrayIter::new(self)
937 }
938}
939
940impl<'a, K, V> ArrayAccessor for TypedDictionaryArray<'a, K, V>
941where
942 K: ArrowDictionaryKeyType,
943 V: Sync + Send,
944 &'a V: ArrayAccessor,
945 <&'a V as ArrayAccessor>::Item: Default,
946{
947 type Item = <&'a V as ArrayAccessor>::Item;
948
949 fn value(&self, index: usize) -> Self::Item {
950 assert!(
951 index < self.len(),
952 "Trying to access an element at index {} from a TypedDictionaryArray of length {}",
953 index,
954 self.len()
955 );
956 unsafe { self.value_unchecked(index) }
957 }
958
959 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
960 let val = unsafe { self.dictionary.keys.value_unchecked(index) };
961 let value_idx = val.as_usize();
962
963 match value_idx < self.values.len() {
966 true => unsafe { self.values.value_unchecked(value_idx) },
967 false => Default::default(),
968 }
969 }
970}
971
972pub trait AnyDictionaryArray: Array {
1017 fn keys(&self) -> &dyn Array;
1019
1020 fn values(&self) -> &ArrayRef;
1022
1023 fn normalized_keys(&self) -> Vec<usize>;
1032
1033 fn with_values(&self, values: ArrayRef) -> ArrayRef;
1037}
1038
1039impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for DictionaryArray<K> {
1040 fn keys(&self) -> &dyn Array {
1041 &self.keys
1042 }
1043
1044 fn values(&self) -> &ArrayRef {
1045 self.values()
1046 }
1047
1048 fn normalized_keys(&self) -> Vec<usize> {
1049 let v_len = self.values().len();
1050 assert_ne!(v_len, 0);
1051 let iter = self.keys().values().iter();
1052 iter.map(|x| x.as_usize().min(v_len - 1)).collect()
1053 }
1054
1055 fn with_values(&self, values: ArrayRef) -> ArrayRef {
1056 Arc::new(self.with_values(values))
1057 }
1058}
1059
1060#[cfg(test)]
1061mod tests {
1062 use super::*;
1063 use crate::cast::as_dictionary_array;
1064 use crate::{Int8Array, Int16Array, Int32Array, RunArray, UInt8Array};
1065 use arrow_buffer::{Buffer, ToByteSlice};
1066
1067 #[test]
1068 fn test_dictionary_array() {
1069 let value_data = ArrayData::builder(DataType::Int8)
1071 .len(8)
1072 .add_buffer(Buffer::from(
1073 [10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
1074 ))
1075 .build()
1076 .unwrap();
1077
1078 let keys = Buffer::from([2_i16, 3, 4].to_byte_slice());
1080
1081 let key_type = DataType::Int16;
1083 let value_type = DataType::Int8;
1084 let dict_data_type = DataType::Dictionary(Box::new(key_type), Box::new(value_type));
1085 let dict_data = ArrayData::builder(dict_data_type.clone())
1086 .len(3)
1087 .add_buffer(keys.clone())
1088 .add_child_data(value_data.clone())
1089 .build()
1090 .unwrap();
1091 let dict_array = Int16DictionaryArray::from(dict_data);
1092
1093 let values = dict_array.values();
1094 assert_eq!(value_data, values.to_data());
1095 assert_eq!(DataType::Int8, dict_array.value_type());
1096 assert_eq!(3, dict_array.len());
1097
1098 assert_eq!(0, dict_array.null_count());
1100 assert_eq!(0, dict_array.values().null_count());
1101 assert_eq!(dict_array.keys(), &Int16Array::from(vec![2_i16, 3, 4]));
1102
1103 let dict_data = ArrayData::builder(dict_data_type)
1105 .len(2)
1106 .offset(1)
1107 .add_buffer(keys)
1108 .add_child_data(value_data.clone())
1109 .build()
1110 .unwrap();
1111 let dict_array = Int16DictionaryArray::from(dict_data);
1112
1113 let values = dict_array.values();
1114 assert_eq!(value_data, values.to_data());
1115 assert_eq!(DataType::Int8, dict_array.value_type());
1116 assert_eq!(2, dict_array.len());
1117 assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4]));
1118 }
1119
1120 #[test]
1121 fn test_dictionary_builder_append_many() {
1122 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::new();
1123
1124 builder.append(1).unwrap();
1125 builder.append_n(2, 2).unwrap();
1126 builder.append_options(None, 2);
1127 builder.append_options(Some(3), 3);
1128
1129 let array = builder.finish();
1130
1131 let values = array
1132 .values()
1133 .as_primitive::<UInt32Type>()
1134 .iter()
1135 .map(Option::unwrap)
1136 .collect::<Vec<_>>();
1137 assert_eq!(values, &[1, 2, 3]);
1138 let keys = array.keys().iter().collect::<Vec<_>>();
1139 assert_eq!(
1140 keys,
1141 &[
1142 Some(0),
1143 Some(1),
1144 Some(1),
1145 None,
1146 None,
1147 Some(2),
1148 Some(2),
1149 Some(2)
1150 ]
1151 );
1152 }
1153
1154 #[test]
1155 fn test_string_dictionary_builder_append_many() {
1156 let mut builder = StringDictionaryBuilder::<Int8Type>::new();
1157
1158 builder.append("a").unwrap();
1159 builder.append_n("b", 2).unwrap();
1160 builder.append_options(None::<&str>, 2);
1161 builder.append_options(Some("c"), 3);
1162
1163 let array = builder.finish();
1164
1165 let values = array
1166 .values()
1167 .as_string::<i32>()
1168 .iter()
1169 .map(Option::unwrap)
1170 .collect::<Vec<_>>();
1171 assert_eq!(values, &["a", "b", "c"]);
1172 let keys = array.keys().iter().collect::<Vec<_>>();
1173 assert_eq!(
1174 keys,
1175 &[
1176 Some(0),
1177 Some(1),
1178 Some(1),
1179 None,
1180 None,
1181 Some(2),
1182 Some(2),
1183 Some(2)
1184 ]
1185 );
1186 }
1187
1188 #[test]
1189 fn test_dictionary_array_fmt_debug() {
1190 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
1191 builder.append(12345678).unwrap();
1192 builder.append_null();
1193 builder.append(22345678).unwrap();
1194 let array = builder.finish();
1195 assert_eq!(
1196 "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n null,\n 1,\n] values: PrimitiveArray<UInt32>\n[\n 12345678,\n 22345678,\n]}\n",
1197 format!("{array:?}")
1198 );
1199
1200 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
1201 for _ in 0..20 {
1202 builder.append(1).unwrap();
1203 }
1204 let array = builder.finish();
1205 assert_eq!(
1206 "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n] values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
1207 format!("{array:?}")
1208 );
1209 }
1210
1211 #[test]
1212 fn test_dictionary_array_from_iter() {
1213 let test = vec!["a", "a", "b", "c"];
1214 let array: DictionaryArray<Int8Type> = test
1215 .iter()
1216 .map(|&x| if x == "b" { None } else { Some(x) })
1217 .collect();
1218 assert_eq!(
1219 "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n null,\n 1,\n] values: StringArray\n[\n \"a\",\n \"c\",\n]}\n",
1220 format!("{array:?}")
1221 );
1222
1223 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1224 assert_eq!(
1225 "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n 1,\n 2,\n] values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
1226 format!("{array:?}")
1227 );
1228 }
1229
1230 #[test]
1231 fn test_dictionary_array_reverse_lookup_key() {
1232 let test = vec!["a", "a", "b", "c"];
1233 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1234
1235 assert_eq!(array.lookup_key("c"), Some(2));
1236
1237 let test = vec!["t3", "t3", "t2", "t2", "t1", "t3", "t4", "t1", "t0"];
1239 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1240
1241 assert_eq!(array.lookup_key("t1"), Some(2));
1242 assert_eq!(array.lookup_key("non-existent"), None);
1243 }
1244
1245 #[test]
1246 fn test_dictionary_keys_as_primitive_array() {
1247 let test = vec!["a", "b", "c", "a"];
1248 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1249
1250 let keys = array.keys();
1251 assert_eq!(&DataType::Int8, keys.data_type());
1252 assert_eq!(0, keys.null_count());
1253 assert_eq!(&[0, 1, 2, 0], keys.values());
1254 }
1255
1256 #[test]
1257 fn test_dictionary_keys_as_primitive_array_with_null() {
1258 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1259 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1260
1261 let keys = array.keys();
1262 assert_eq!(&DataType::Int32, keys.data_type());
1263 assert_eq!(3, keys.null_count());
1264
1265 assert!(keys.is_valid(0));
1266 assert!(!keys.is_valid(1));
1267 assert!(keys.is_valid(2));
1268 assert!(!keys.is_valid(3));
1269 assert!(!keys.is_valid(4));
1270 assert!(keys.is_valid(5));
1271
1272 assert_eq!(0, keys.value(0));
1273 assert_eq!(1, keys.value(2));
1274 assert_eq!(0, keys.value(5));
1275 }
1276
1277 #[test]
1278 fn test_dictionary_all_nulls() {
1279 let test = vec![None, None, None];
1280 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1281 array
1282 .into_data()
1283 .validate_full()
1284 .expect("All null array has valid array data");
1285 }
1286
1287 #[test]
1288 fn test_dictionary_iter() {
1289 let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1291 let keys = Int16Array::from_iter_values([2_i16, 3, 4]);
1292
1293 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1295
1296 let mut key_iter = dict_array.keys_iter();
1297 assert_eq!(2, key_iter.next().unwrap().unwrap());
1298 assert_eq!(3, key_iter.next().unwrap().unwrap());
1299 assert_eq!(4, key_iter.next().unwrap().unwrap());
1300 assert!(key_iter.next().is_none());
1301
1302 let mut iter = dict_array
1303 .values()
1304 .as_any()
1305 .downcast_ref::<Int8Array>()
1306 .unwrap()
1307 .take_iter(dict_array.keys_iter());
1308
1309 assert_eq!(12, iter.next().unwrap().unwrap());
1310 assert_eq!(13, iter.next().unwrap().unwrap());
1311 assert_eq!(14, iter.next().unwrap().unwrap());
1312 assert!(iter.next().is_none());
1313 }
1314
1315 #[test]
1316 fn test_dictionary_iter_with_null() {
1317 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1318 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1319
1320 let mut iter = array
1321 .values()
1322 .as_any()
1323 .downcast_ref::<StringArray>()
1324 .unwrap()
1325 .take_iter(array.keys_iter());
1326
1327 assert_eq!("a", iter.next().unwrap().unwrap());
1328 assert!(iter.next().unwrap().is_none());
1329 assert_eq!("b", iter.next().unwrap().unwrap());
1330 assert!(iter.next().unwrap().is_none());
1331 assert!(iter.next().unwrap().is_none());
1332 assert_eq!("a", iter.next().unwrap().unwrap());
1333 assert!(iter.next().is_none());
1334 }
1335
1336 #[test]
1337 fn test_dictionary_key() {
1338 let keys = Int8Array::from(vec![Some(2), None, Some(1)]);
1339 let values = StringArray::from(vec!["foo", "bar", "baz", "blarg"]);
1340
1341 let array = DictionaryArray::new(keys, Arc::new(values));
1342 assert_eq!(array.key(0), Some(2));
1343 assert_eq!(array.key(1), None);
1344 assert_eq!(array.key(2), Some(1));
1345 }
1346
1347 #[test]
1348 fn test_try_new() {
1349 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1350 .into_iter()
1351 .collect();
1352 let keys: Int32Array = [Some(0), Some(2), None, Some(1)].into_iter().collect();
1353
1354 let array = DictionaryArray::new(keys, Arc::new(values));
1355 assert_eq!(array.keys().data_type(), &DataType::Int32);
1356 assert_eq!(array.values().data_type(), &DataType::Utf8);
1357
1358 assert_eq!(array.null_count(), 1);
1359 assert_eq!(array.logical_null_count(), 1);
1360
1361 assert!(array.keys().is_valid(0));
1362 assert!(array.keys().is_valid(1));
1363 assert!(array.keys().is_null(2));
1364 assert!(array.keys().is_valid(3));
1365
1366 assert_eq!(array.keys().value(0), 0);
1367 assert_eq!(array.keys().value(1), 2);
1368 assert_eq!(array.keys().value(3), 1);
1369
1370 assert_eq!(
1371 "DictionaryArray {keys: PrimitiveArray<Int32>\n[\n 0,\n 2,\n null,\n 1,\n] values: StringArray\n[\n \"foo\",\n \"bar\",\n \"baz\",\n]}\n",
1372 format!("{array:?}")
1373 );
1374 }
1375
1376 #[test]
1377 #[should_panic(expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2")]
1378 fn test_try_new_index_too_large() {
1379 let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
1380 let keys: Int32Array = [Some(0), Some(3)].into_iter().collect();
1382 DictionaryArray::new(keys, Arc::new(values));
1383 }
1384
1385 #[test]
1386 #[should_panic(expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2")]
1387 fn test_try_new_index_too_small() {
1388 let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
1389 let keys: Int32Array = [Some(-100)].into_iter().collect();
1390 DictionaryArray::new(keys, Arc::new(values));
1391 }
1392
1393 #[test]
1394 #[should_panic(expected = "DictionaryArray's data type must match, expected Int64 got Int32")]
1395 fn test_from_array_data_validation() {
1396 let a = DictionaryArray::<Int32Type>::from_iter(["32"]);
1397 let _ = DictionaryArray::<Int64Type>::from(a.into_data());
1398 }
1399
1400 #[test]
1401 fn test_into_primitive_dict_builder() {
1402 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1403 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1404
1405 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1406
1407 let boxed: ArrayRef = Arc::new(dict_array);
1408 let col: DictionaryArray<Int8Type> = as_dictionary_array(&boxed).clone();
1409
1410 drop(boxed);
1411
1412 let mut builder = col.into_primitive_dict_builder::<Int32Type>().unwrap();
1413
1414 let slice = builder.values_slice_mut();
1415 assert_eq!(slice, &[10, 12, 15]);
1416
1417 slice[0] = 4;
1418 slice[1] = 2;
1419 slice[2] = 1;
1420
1421 let values = Int32Array::from_iter_values([4_i32, 2, 1]);
1422 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1423
1424 let expected = DictionaryArray::new(keys, Arc::new(values));
1425
1426 let new_array = builder.finish();
1427 assert_eq!(expected, new_array);
1428 }
1429
1430 #[test]
1431 fn test_into_primitive_dict_builder_cloned_array() {
1432 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1433 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1434
1435 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1436
1437 let boxed: ArrayRef = Arc::new(dict_array);
1438
1439 let col: DictionaryArray<Int8Type> = DictionaryArray::<Int8Type>::from(boxed.to_data());
1440 let err = col.into_primitive_dict_builder::<Int32Type>();
1441
1442 let returned = err.unwrap_err();
1443
1444 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1445 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1446
1447 let expected = DictionaryArray::new(keys, Arc::new(values));
1448 assert_eq!(expected, returned);
1449 }
1450
1451 #[test]
1452 fn test_occupancy() {
1453 let keys = Int32Array::new((100..200).collect(), None);
1454 let values = Int32Array::from(vec![0; 1024]);
1455 let dict = DictionaryArray::new(keys, Arc::new(values));
1456 for (idx, v) in dict.occupancy().iter().enumerate() {
1457 let expected = (100..200).contains(&idx);
1458 assert_eq!(v, expected, "{idx}");
1459 }
1460
1461 let keys = Int32Array::new(
1462 (0..100).collect(),
1463 Some((0..100).map(|x| x % 4 == 0).collect()),
1464 );
1465 let values = Int32Array::from(vec![0; 1024]);
1466 let dict = DictionaryArray::new(keys, Arc::new(values));
1467 for (idx, v) in dict.occupancy().iter().enumerate() {
1468 let expected = idx % 4 == 0 && idx < 100;
1469 assert_eq!(v, expected, "{idx}");
1470 }
1471 }
1472
1473 #[test]
1474 fn test_iterator_nulls() {
1475 let keys = Int32Array::new(
1476 vec![0, 700, 1, 2].into(),
1477 Some(NullBuffer::from(vec![true, false, true, true])),
1478 );
1479 let values = Int32Array::from(vec![Some(50), None, Some(2)]);
1480 let dict = DictionaryArray::new(keys, Arc::new(values));
1481 let values: Vec<_> = dict
1482 .downcast_dict::<Int32Array>()
1483 .unwrap()
1484 .into_iter()
1485 .collect();
1486 assert_eq!(values, &[Some(50), None, None, Some(2)])
1487 }
1488
1489 #[test]
1490 fn test_logical_nulls() -> Result<(), ArrowError> {
1491 let values = Arc::new(RunArray::try_new(
1492 &Int32Array::from(vec![1, 3, 7]),
1493 &Int32Array::from(vec![Some(1), None, Some(3)]),
1494 )?) as ArrayRef;
1495
1496 assert_eq!(values.null_count(), 0);
1498 assert_eq!(values.logical_null_count(), 2);
1499
1500 let dictionary = DictionaryArray::<Int8Type>::try_new(
1502 Int8Array::from((0..values.len()).map(|i| i as i8).collect::<Vec<_>>()),
1503 Arc::clone(&values),
1504 )?;
1505
1506 assert_eq!(dictionary.null_count(), 0);
1508 assert_eq!(dictionary.logical_null_count(), values.logical_null_count());
1510 assert_eq!(dictionary.logical_nulls(), values.logical_nulls());
1511 assert!(dictionary.is_nullable());
1512
1513 let dictionary = DictionaryArray::<Int8Type>::try_new(
1515 Int8Array::from(
1516 (0..values.len())
1517 .map(|i| i as i8)
1518 .map(|i| if i == 0 { None } else { Some(i) })
1519 .collect::<Vec<_>>(),
1520 ),
1521 Arc::clone(&values),
1522 )?;
1523
1524 assert_eq!(dictionary.null_count(), 1);
1526
1527 assert_eq!(
1529 dictionary.logical_null_count(),
1530 values.logical_null_count() + 1
1531 );
1532 assert!(dictionary.is_nullable());
1533
1534 Ok(())
1535 }
1536
1537 #[test]
1538 fn test_normalized_keys() {
1539 let values = vec![132, 0, 1].into();
1540 let nulls = NullBuffer::from(vec![false, true, true]);
1541 let keys = Int32Array::new(values, Some(nulls));
1542 let dictionary = DictionaryArray::new(keys, Arc::new(Int32Array::new_null(2)));
1543 assert_eq!(&dictionary.normalized_keys(), &[1, 0, 1])
1544 }
1545
1546 #[test]
1547 fn test_all_null_dict() {
1548 let all_null_dict_arr = DictionaryArray::try_new(
1549 UInt8Array::new_null(10),
1550 Arc::new(StringArray::from_iter_values(["a"])),
1551 );
1552 assert!(all_null_dict_arr.is_ok())
1553 }
1554}