1use crate::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder};
19use crate::cast::AsArray;
20use crate::iterator::ArrayIter;
21use crate::types::*;
22use crate::{
23 Array, ArrayAccessor, ArrayRef, ArrowNativeTypeOp, PrimitiveArray, Scalar, StringArray,
24 make_array,
25};
26use arrow_buffer::bit_util::set_bit;
27use arrow_buffer::buffer::NullBuffer;
28use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder, ScalarBuffer};
29use arrow_data::ArrayData;
30use arrow_schema::{ArrowError, DataType};
31use std::any::Any;
32use std::sync::Arc;
33
34pub type Int8DictionaryArray = DictionaryArray<Int8Type>;
49
50pub type Int16DictionaryArray = DictionaryArray<Int16Type>;
65
66pub type Int32DictionaryArray = DictionaryArray<Int32Type>;
81
82pub type Int64DictionaryArray = DictionaryArray<Int64Type>;
97
98pub type UInt8DictionaryArray = DictionaryArray<UInt8Type>;
113
114pub type UInt16DictionaryArray = DictionaryArray<UInt16Type>;
129
130pub type UInt32DictionaryArray = DictionaryArray<UInt32Type>;
145
146pub type UInt64DictionaryArray = DictionaryArray<UInt64Type>;
161
162pub struct DictionaryArray<K: ArrowDictionaryKeyType> {
244 data_type: DataType,
245
246 keys: PrimitiveArray<K>,
251
252 values: ArrayRef,
254
255 is_ordered: bool,
257}
258
259impl<K: ArrowDictionaryKeyType> Clone for DictionaryArray<K> {
260 fn clone(&self) -> Self {
261 Self {
262 data_type: self.data_type.clone(),
263 keys: self.keys.clone(),
264 values: self.values.clone(),
265 is_ordered: self.is_ordered,
266 }
267 }
268}
269
270impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
271 pub fn new(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
279 Self::try_new(keys, values).unwrap()
280 }
281
282 pub fn try_new(keys: PrimitiveArray<K>, values: ArrayRef) -> Result<Self, ArrowError> {
290 let data_type = DataType::Dictionary(
291 Box::new(keys.data_type().clone()),
292 Box::new(values.data_type().clone()),
293 );
294
295 let all_null = keys.null_count() == keys.len();
297
298 if !all_null {
299 let zero = K::Native::usize_as(0);
300 let values_len = values.len();
301
302 if let Some((idx, v)) = keys.values().iter().enumerate().find(|(idx, v)| {
303 (v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
304 }) {
305 return Err(ArrowError::InvalidArgumentError(format!(
306 "Invalid dictionary key {v:?} at index {idx}, expected 0 <= key < {values_len}",
307 )));
308 }
309 }
310
311 Ok(Self {
312 data_type,
313 keys,
314 values,
315 is_ordered: false,
316 })
317 }
318
319 pub fn new_scalar<T: Array + 'static>(value: Scalar<T>) -> Scalar<Self> {
321 Scalar::new(Self::new(
322 PrimitiveArray::new(vec![K::Native::usize_as(0)].into(), None),
323 Arc::new(value.into_inner()),
324 ))
325 }
326
327 pub unsafe fn new_unchecked(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
333 if cfg!(feature = "force_validate") {
334 return Self::new(keys, values);
335 }
336
337 let data_type = DataType::Dictionary(
338 Box::new(keys.data_type().clone()),
339 Box::new(values.data_type().clone()),
340 );
341
342 Self {
343 data_type,
344 keys,
345 values,
346 is_ordered: false,
347 }
348 }
349
350 pub fn into_parts(self) -> (PrimitiveArray<K>, ArrayRef) {
352 (self.keys, self.values)
353 }
354
355 pub fn keys(&self) -> &PrimitiveArray<K> {
357 &self.keys
358 }
359
360 pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
366 let rd_buf: &StringArray = self.values.as_any().downcast_ref::<StringArray>().unwrap();
367
368 (0..rd_buf.len())
369 .position(|i| rd_buf.value(i) == value)
370 .and_then(K::Native::from_usize)
371 }
372
373 pub fn values(&self) -> &ArrayRef {
375 &self.values
376 }
377
378 pub fn value_type(&self) -> DataType {
380 self.values.data_type().clone()
381 }
382
383 pub fn len(&self) -> usize {
385 self.keys.len()
386 }
387
388 pub fn is_empty(&self) -> bool {
390 self.keys.is_empty()
391 }
392
393 pub fn is_ordered(&self) -> bool {
395 self.is_ordered
396 }
397
398 pub fn keys_iter(&self) -> impl Iterator<Item = Option<usize>> + '_ {
400 self.keys.iter().map(|key| key.map(|k| k.as_usize()))
401 }
402
403 pub fn key(&self, i: usize) -> Option<usize> {
406 self.keys.is_valid(i).then(|| self.keys.value(i).as_usize())
407 }
408
409 pub fn slice(&self, offset: usize, length: usize) -> Self {
411 Self {
412 data_type: self.data_type.clone(),
413 keys: self.keys.slice(offset, length),
414 values: self.values.clone(),
415 is_ordered: self.is_ordered,
416 }
417 }
418
419 pub fn downcast_dict<V: 'static>(&self) -> Option<TypedDictionaryArray<'_, K, V>> {
433 let values = self.values.as_any().downcast_ref()?;
434 Some(TypedDictionaryArray {
435 dictionary: self,
436 values,
437 })
438 }
439
440 pub fn with_values(&self, values: ArrayRef) -> Self {
478 assert!(values.len() >= self.values.len());
479 let data_type =
480 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
481 Self {
482 data_type,
483 keys: self.keys.clone(),
484 values,
485 is_ordered: false,
486 }
487 }
488
489 #[allow(clippy::result_large_err)]
492 pub fn into_primitive_dict_builder<V>(self) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
493 where
494 V: ArrowPrimitiveType,
495 {
496 if !self.value_type().is_primitive() {
497 return Err(self);
498 }
499
500 let key_array = self.keys().clone();
501 let value_array = self.values().as_primitive::<V>().clone();
502
503 drop(self.keys);
504 drop(self.values);
505
506 let key_builder = key_array.into_builder();
507 let value_builder = value_array.into_builder();
508
509 match (key_builder, value_builder) {
510 (Ok(key_builder), Ok(value_builder)) => Ok(unsafe {
511 PrimitiveDictionaryBuilder::new_from_builders(key_builder, value_builder)
512 }),
513 (Err(key_array), Ok(mut value_builder)) => {
514 Err(Self::try_new(key_array, Arc::new(value_builder.finish())).unwrap())
515 }
516 (Ok(mut key_builder), Err(value_array)) => {
517 Err(Self::try_new(key_builder.finish(), Arc::new(value_array)).unwrap())
518 }
519 (Err(key_array), Err(value_array)) => {
520 Err(Self::try_new(key_array, Arc::new(value_array)).unwrap())
521 }
522 }
523 }
524
525 #[allow(clippy::result_large_err)]
549 pub fn unary_mut<F, V>(self, op: F) -> Result<DictionaryArray<K>, DictionaryArray<K>>
550 where
551 V: ArrowPrimitiveType,
552 F: Fn(V::Native) -> V::Native,
553 {
554 let mut builder: PrimitiveDictionaryBuilder<K, V> = self.into_primitive_dict_builder()?;
555 builder
556 .values_slice_mut()
557 .iter_mut()
558 .for_each(|v| *v = op(*v));
559 Ok(builder.finish())
560 }
561
562 pub fn occupancy(&self) -> BooleanBuffer {
567 let len = self.values.len();
568 let mut builder = BooleanBufferBuilder::new(len);
569 builder.resize(len);
570 let slice = builder.as_slice_mut();
571 match self.keys.nulls().filter(|n| n.null_count() > 0) {
572 Some(n) => {
573 let v = self.keys.values();
574 n.valid_indices()
575 .for_each(|idx| set_bit(slice, v[idx].as_usize()))
576 }
577 None => {
578 let v = self.keys.values();
579 v.iter().for_each(|v| set_bit(slice, v.as_usize()))
580 }
581 }
582 builder.finish()
583 }
584}
585
586impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
588 fn from(data: ArrayData) -> Self {
589 let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
590
591 assert_eq!(
592 buffers.len(),
593 1,
594 "DictionaryArray data should contain a single buffer only (keys)."
595 );
596 let buffer = buffers.pop().expect("checked above");
597 assert_eq!(
598 child_data.len(),
599 1,
600 "DictionaryArray should contain a single child array (values)."
601 );
602 let cd = child_data.pop().expect("checked above");
603
604 if let DataType::Dictionary(key_data_type, _) = &data_type {
605 assert_eq!(
606 &T::DATA_TYPE,
607 key_data_type.as_ref(),
608 "DictionaryArray's data type must match, expected {} got {}",
609 T::DATA_TYPE,
610 key_data_type
611 );
612
613 let values = make_array(cd);
614
615 let keys = PrimitiveArray::<T>::new(ScalarBuffer::new(buffer, offset, len), nulls);
617
618 Self {
619 data_type,
620 keys,
621 values,
622 is_ordered: false,
623 }
624 } else {
625 panic!("DictionaryArray must have Dictionary data type.")
626 }
627 }
628}
629
630impl<T: ArrowDictionaryKeyType> From<DictionaryArray<T>> for ArrayData {
631 fn from(array: DictionaryArray<T>) -> Self {
632 let builder = array
633 .keys
634 .into_data()
635 .into_builder()
636 .data_type(array.data_type)
637 .child_data(vec![array.values.to_data()]);
638
639 unsafe { builder.build_unchecked() }
640 }
641}
642
643impl<'a, T: ArrowDictionaryKeyType> FromIterator<Option<&'a str>> for DictionaryArray<T> {
660 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
661 let it = iter.into_iter();
662 let (lower, _) = it.size_hint();
663 let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
664 builder.extend(it);
665 builder.finish()
666 }
667}
668
669impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str> for DictionaryArray<T> {
684 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
685 let it = iter.into_iter();
686 let (lower, _) = it.size_hint();
687 let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
688 it.for_each(|i| {
689 builder
690 .append(i)
691 .expect("Unable to append a value to a dictionary array.");
692 });
693
694 builder.finish()
695 }
696}
697
698unsafe impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
700 fn as_any(&self) -> &dyn Any {
701 self
702 }
703
704 fn to_data(&self) -> ArrayData {
705 self.clone().into()
706 }
707
708 fn into_data(self) -> ArrayData {
709 self.into()
710 }
711
712 fn data_type(&self) -> &DataType {
713 &self.data_type
714 }
715
716 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
717 Arc::new(self.slice(offset, length))
718 }
719
720 fn len(&self) -> usize {
721 self.keys.len()
722 }
723
724 fn is_empty(&self) -> bool {
725 self.keys.is_empty()
726 }
727
728 fn shrink_to_fit(&mut self) {
729 self.keys.shrink_to_fit();
730 self.values.shrink_to_fit();
731 }
732
733 fn offset(&self) -> usize {
734 self.keys.offset()
735 }
736
737 fn nulls(&self) -> Option<&NullBuffer> {
738 self.keys.nulls()
739 }
740
741 fn logical_nulls(&self) -> Option<NullBuffer> {
742 match self.values.logical_nulls() {
743 None => self.nulls().cloned(),
744 Some(value_nulls) => {
745 let mut builder = BooleanBufferBuilder::new(self.len());
746 match self.keys.nulls() {
747 Some(n) => builder.append_buffer(n.inner()),
748 None => builder.append_n(self.len(), true),
749 }
750 for (idx, k) in self.keys.values().iter().enumerate() {
751 let k = k.as_usize();
752 if k < value_nulls.len() && value_nulls.is_null(k) {
754 builder.set_bit(idx, false);
755 }
756 }
757 Some(builder.finish().into())
758 }
759 }
760 }
761
762 fn logical_null_count(&self) -> usize {
763 match (self.keys.nulls(), self.values.logical_nulls()) {
764 (None, None) => 0,
765 (Some(key_nulls), None) => key_nulls.null_count(),
766 (None, Some(value_nulls)) => self
767 .keys
768 .values()
769 .iter()
770 .filter(|k| value_nulls.is_null(k.as_usize()))
771 .count(),
772 (Some(key_nulls), Some(value_nulls)) => self
773 .keys
774 .values()
775 .iter()
776 .enumerate()
777 .filter(|(idx, k)| key_nulls.is_null(*idx) || value_nulls.is_null(k.as_usize()))
778 .count(),
779 }
780 }
781
782 fn is_nullable(&self) -> bool {
783 !self.is_empty() && (self.nulls().is_some() || self.values.is_nullable())
784 }
785
786 fn get_buffer_memory_size(&self) -> usize {
787 self.keys.get_buffer_memory_size() + self.values.get_buffer_memory_size()
788 }
789
790 fn get_array_memory_size(&self) -> usize {
791 std::mem::size_of::<Self>()
792 + self.keys.get_buffer_memory_size()
793 + self.values.get_array_memory_size()
794 }
795}
796
797impl<T: ArrowDictionaryKeyType> std::fmt::Debug for DictionaryArray<T> {
798 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
799 writeln!(
800 f,
801 "DictionaryArray {{keys: {:?} values: {:?}}}",
802 self.keys, self.values
803 )
804 }
805}
806
807pub struct TypedDictionaryArray<'a, K: ArrowDictionaryKeyType, V> {
825 dictionary: &'a DictionaryArray<K>,
827 values: &'a V,
829}
830
831impl<K: ArrowDictionaryKeyType, V> Clone for TypedDictionaryArray<'_, K, V> {
833 fn clone(&self) -> Self {
834 *self
835 }
836}
837
838impl<K: ArrowDictionaryKeyType, V> Copy for TypedDictionaryArray<'_, K, V> {}
839
840impl<K: ArrowDictionaryKeyType, V> std::fmt::Debug for TypedDictionaryArray<'_, K, V> {
841 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
842 writeln!(f, "TypedDictionaryArray({:?})", self.dictionary)
843 }
844}
845
846impl<'a, K: ArrowDictionaryKeyType, V> TypedDictionaryArray<'a, K, V> {
847 pub fn keys(&self) -> &'a PrimitiveArray<K> {
849 self.dictionary.keys()
850 }
851
852 pub fn values(&self) -> &'a V {
854 self.values
855 }
856}
857
858unsafe impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V> {
859 fn as_any(&self) -> &dyn Any {
860 self.dictionary
861 }
862
863 fn to_data(&self) -> ArrayData {
864 self.dictionary.to_data()
865 }
866
867 fn into_data(self) -> ArrayData {
868 self.dictionary.into_data()
869 }
870
871 fn data_type(&self) -> &DataType {
872 self.dictionary.data_type()
873 }
874
875 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
876 Arc::new(self.dictionary.slice(offset, length))
877 }
878
879 fn len(&self) -> usize {
880 self.dictionary.len()
881 }
882
883 fn is_empty(&self) -> bool {
884 self.dictionary.is_empty()
885 }
886
887 fn offset(&self) -> usize {
888 self.dictionary.offset()
889 }
890
891 fn nulls(&self) -> Option<&NullBuffer> {
892 self.dictionary.nulls()
893 }
894
895 fn logical_nulls(&self) -> Option<NullBuffer> {
896 self.dictionary.logical_nulls()
897 }
898
899 fn logical_null_count(&self) -> usize {
900 self.dictionary.logical_null_count()
901 }
902
903 fn is_nullable(&self) -> bool {
904 self.dictionary.is_nullable()
905 }
906
907 fn get_buffer_memory_size(&self) -> usize {
908 self.dictionary.get_buffer_memory_size()
909 }
910
911 fn get_array_memory_size(&self) -> usize {
912 self.dictionary.get_array_memory_size()
913 }
914}
915
916impl<K, V> IntoIterator for TypedDictionaryArray<'_, K, V>
917where
918 K: ArrowDictionaryKeyType,
919 Self: ArrayAccessor,
920{
921 type Item = Option<<Self as ArrayAccessor>::Item>;
922 type IntoIter = ArrayIter<Self>;
923
924 fn into_iter(self) -> Self::IntoIter {
925 ArrayIter::new(self)
926 }
927}
928
929impl<'a, K, V> ArrayAccessor for TypedDictionaryArray<'a, K, V>
930where
931 K: ArrowDictionaryKeyType,
932 V: Sync + Send,
933 &'a V: ArrayAccessor,
934 <&'a V as ArrayAccessor>::Item: Default,
935{
936 type Item = <&'a V as ArrayAccessor>::Item;
937
938 fn value(&self, index: usize) -> Self::Item {
939 assert!(
940 index < self.len(),
941 "Trying to access an element at index {} from a TypedDictionaryArray of length {}",
942 index,
943 self.len()
944 );
945 unsafe { self.value_unchecked(index) }
946 }
947
948 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
949 let val = unsafe { self.dictionary.keys.value_unchecked(index) };
950 let value_idx = val.as_usize();
951
952 match value_idx < self.values.len() {
955 true => unsafe { self.values.value_unchecked(value_idx) },
956 false => Default::default(),
957 }
958 }
959}
960
961pub trait AnyDictionaryArray: Array {
1006 fn keys(&self) -> &dyn Array;
1008
1009 fn values(&self) -> &ArrayRef;
1011
1012 fn normalized_keys(&self) -> Vec<usize>;
1021
1022 fn with_values(&self, values: ArrayRef) -> ArrayRef;
1026}
1027
1028impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for DictionaryArray<K> {
1029 fn keys(&self) -> &dyn Array {
1030 &self.keys
1031 }
1032
1033 fn values(&self) -> &ArrayRef {
1034 self.values()
1035 }
1036
1037 fn normalized_keys(&self) -> Vec<usize> {
1038 let v_len = self.values().len();
1039 assert_ne!(v_len, 0);
1040 let iter = self.keys().values().iter();
1041 iter.map(|x| x.as_usize().min(v_len - 1)).collect()
1042 }
1043
1044 fn with_values(&self, values: ArrayRef) -> ArrayRef {
1045 Arc::new(self.with_values(values))
1046 }
1047}
1048
1049#[cfg(test)]
1050mod tests {
1051 use super::*;
1052 use crate::cast::as_dictionary_array;
1053 use crate::{Int8Array, Int16Array, Int32Array, RunArray, UInt8Array};
1054 use arrow_buffer::{Buffer, ToByteSlice};
1055
1056 #[test]
1057 fn test_dictionary_array() {
1058 let value_data = ArrayData::builder(DataType::Int8)
1060 .len(8)
1061 .add_buffer(Buffer::from(
1062 [10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
1063 ))
1064 .build()
1065 .unwrap();
1066
1067 let keys = Buffer::from([2_i16, 3, 4].to_byte_slice());
1069
1070 let key_type = DataType::Int16;
1072 let value_type = DataType::Int8;
1073 let dict_data_type = DataType::Dictionary(Box::new(key_type), Box::new(value_type));
1074 let dict_data = ArrayData::builder(dict_data_type.clone())
1075 .len(3)
1076 .add_buffer(keys.clone())
1077 .add_child_data(value_data.clone())
1078 .build()
1079 .unwrap();
1080 let dict_array = Int16DictionaryArray::from(dict_data);
1081
1082 let values = dict_array.values();
1083 assert_eq!(value_data, values.to_data());
1084 assert_eq!(DataType::Int8, dict_array.value_type());
1085 assert_eq!(3, dict_array.len());
1086
1087 assert_eq!(0, dict_array.null_count());
1089 assert_eq!(0, dict_array.values().null_count());
1090 assert_eq!(dict_array.keys(), &Int16Array::from(vec![2_i16, 3, 4]));
1091
1092 let dict_data = ArrayData::builder(dict_data_type)
1094 .len(2)
1095 .offset(1)
1096 .add_buffer(keys)
1097 .add_child_data(value_data.clone())
1098 .build()
1099 .unwrap();
1100 let dict_array = Int16DictionaryArray::from(dict_data);
1101
1102 let values = dict_array.values();
1103 assert_eq!(value_data, values.to_data());
1104 assert_eq!(DataType::Int8, dict_array.value_type());
1105 assert_eq!(2, dict_array.len());
1106 assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4]));
1107 }
1108
1109 #[test]
1110 fn test_dictionary_builder_append_many() {
1111 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::new();
1112
1113 builder.append(1).unwrap();
1114 builder.append_n(2, 2).unwrap();
1115 builder.append_options(None, 2);
1116 builder.append_options(Some(3), 3);
1117
1118 let array = builder.finish();
1119
1120 let values = array
1121 .values()
1122 .as_primitive::<UInt32Type>()
1123 .iter()
1124 .map(Option::unwrap)
1125 .collect::<Vec<_>>();
1126 assert_eq!(values, &[1, 2, 3]);
1127 let keys = array.keys().iter().collect::<Vec<_>>();
1128 assert_eq!(
1129 keys,
1130 &[
1131 Some(0),
1132 Some(1),
1133 Some(1),
1134 None,
1135 None,
1136 Some(2),
1137 Some(2),
1138 Some(2)
1139 ]
1140 );
1141 }
1142
1143 #[test]
1144 fn test_string_dictionary_builder_append_many() {
1145 let mut builder = StringDictionaryBuilder::<Int8Type>::new();
1146
1147 builder.append("a").unwrap();
1148 builder.append_n("b", 2).unwrap();
1149 builder.append_options(None::<&str>, 2);
1150 builder.append_options(Some("c"), 3);
1151
1152 let array = builder.finish();
1153
1154 let values = array
1155 .values()
1156 .as_string::<i32>()
1157 .iter()
1158 .map(Option::unwrap)
1159 .collect::<Vec<_>>();
1160 assert_eq!(values, &["a", "b", "c"]);
1161 let keys = array.keys().iter().collect::<Vec<_>>();
1162 assert_eq!(
1163 keys,
1164 &[
1165 Some(0),
1166 Some(1),
1167 Some(1),
1168 None,
1169 None,
1170 Some(2),
1171 Some(2),
1172 Some(2)
1173 ]
1174 );
1175 }
1176
1177 #[test]
1178 fn test_dictionary_array_fmt_debug() {
1179 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
1180 builder.append(12345678).unwrap();
1181 builder.append_null();
1182 builder.append(22345678).unwrap();
1183 let array = builder.finish();
1184 assert_eq!(
1185 "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n null,\n 1,\n] values: PrimitiveArray<UInt32>\n[\n 12345678,\n 22345678,\n]}\n",
1186 format!("{array:?}")
1187 );
1188
1189 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
1190 for _ in 0..20 {
1191 builder.append(1).unwrap();
1192 }
1193 let array = builder.finish();
1194 assert_eq!(
1195 "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n] values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
1196 format!("{array:?}")
1197 );
1198 }
1199
1200 #[test]
1201 fn test_dictionary_array_from_iter() {
1202 let test = vec!["a", "a", "b", "c"];
1203 let array: DictionaryArray<Int8Type> = test
1204 .iter()
1205 .map(|&x| if x == "b" { None } else { Some(x) })
1206 .collect();
1207 assert_eq!(
1208 "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n null,\n 1,\n] values: StringArray\n[\n \"a\",\n \"c\",\n]}\n",
1209 format!("{array:?}")
1210 );
1211
1212 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1213 assert_eq!(
1214 "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n 1,\n 2,\n] values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
1215 format!("{array:?}")
1216 );
1217 }
1218
1219 #[test]
1220 fn test_dictionary_array_reverse_lookup_key() {
1221 let test = vec!["a", "a", "b", "c"];
1222 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1223
1224 assert_eq!(array.lookup_key("c"), Some(2));
1225
1226 let test = vec!["t3", "t3", "t2", "t2", "t1", "t3", "t4", "t1", "t0"];
1228 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1229
1230 assert_eq!(array.lookup_key("t1"), Some(2));
1231 assert_eq!(array.lookup_key("non-existent"), None);
1232 }
1233
1234 #[test]
1235 fn test_dictionary_keys_as_primitive_array() {
1236 let test = vec!["a", "b", "c", "a"];
1237 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1238
1239 let keys = array.keys();
1240 assert_eq!(&DataType::Int8, keys.data_type());
1241 assert_eq!(0, keys.null_count());
1242 assert_eq!(&[0, 1, 2, 0], keys.values());
1243 }
1244
1245 #[test]
1246 fn test_dictionary_keys_as_primitive_array_with_null() {
1247 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1248 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1249
1250 let keys = array.keys();
1251 assert_eq!(&DataType::Int32, keys.data_type());
1252 assert_eq!(3, keys.null_count());
1253
1254 assert!(keys.is_valid(0));
1255 assert!(!keys.is_valid(1));
1256 assert!(keys.is_valid(2));
1257 assert!(!keys.is_valid(3));
1258 assert!(!keys.is_valid(4));
1259 assert!(keys.is_valid(5));
1260
1261 assert_eq!(0, keys.value(0));
1262 assert_eq!(1, keys.value(2));
1263 assert_eq!(0, keys.value(5));
1264 }
1265
1266 #[test]
1267 fn test_dictionary_all_nulls() {
1268 let test = vec![None, None, None];
1269 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1270 array
1271 .into_data()
1272 .validate_full()
1273 .expect("All null array has valid array data");
1274 }
1275
1276 #[test]
1277 fn test_dictionary_iter() {
1278 let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1280 let keys = Int16Array::from_iter_values([2_i16, 3, 4]);
1281
1282 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1284
1285 let mut key_iter = dict_array.keys_iter();
1286 assert_eq!(2, key_iter.next().unwrap().unwrap());
1287 assert_eq!(3, key_iter.next().unwrap().unwrap());
1288 assert_eq!(4, key_iter.next().unwrap().unwrap());
1289 assert!(key_iter.next().is_none());
1290
1291 let mut iter = dict_array
1292 .values()
1293 .as_any()
1294 .downcast_ref::<Int8Array>()
1295 .unwrap()
1296 .take_iter(dict_array.keys_iter());
1297
1298 assert_eq!(12, iter.next().unwrap().unwrap());
1299 assert_eq!(13, iter.next().unwrap().unwrap());
1300 assert_eq!(14, iter.next().unwrap().unwrap());
1301 assert!(iter.next().is_none());
1302 }
1303
1304 #[test]
1305 fn test_dictionary_iter_with_null() {
1306 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1307 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1308
1309 let mut iter = array
1310 .values()
1311 .as_any()
1312 .downcast_ref::<StringArray>()
1313 .unwrap()
1314 .take_iter(array.keys_iter());
1315
1316 assert_eq!("a", iter.next().unwrap().unwrap());
1317 assert!(iter.next().unwrap().is_none());
1318 assert_eq!("b", iter.next().unwrap().unwrap());
1319 assert!(iter.next().unwrap().is_none());
1320 assert!(iter.next().unwrap().is_none());
1321 assert_eq!("a", iter.next().unwrap().unwrap());
1322 assert!(iter.next().is_none());
1323 }
1324
1325 #[test]
1326 fn test_dictionary_key() {
1327 let keys = Int8Array::from(vec![Some(2), None, Some(1)]);
1328 let values = StringArray::from(vec!["foo", "bar", "baz", "blarg"]);
1329
1330 let array = DictionaryArray::new(keys, Arc::new(values));
1331 assert_eq!(array.key(0), Some(2));
1332 assert_eq!(array.key(1), None);
1333 assert_eq!(array.key(2), Some(1));
1334 }
1335
1336 #[test]
1337 fn test_try_new() {
1338 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1339 .into_iter()
1340 .collect();
1341 let keys: Int32Array = [Some(0), Some(2), None, Some(1)].into_iter().collect();
1342
1343 let array = DictionaryArray::new(keys, Arc::new(values));
1344 assert_eq!(array.keys().data_type(), &DataType::Int32);
1345 assert_eq!(array.values().data_type(), &DataType::Utf8);
1346
1347 assert_eq!(array.null_count(), 1);
1348 assert_eq!(array.logical_null_count(), 1);
1349
1350 assert!(array.keys().is_valid(0));
1351 assert!(array.keys().is_valid(1));
1352 assert!(array.keys().is_null(2));
1353 assert!(array.keys().is_valid(3));
1354
1355 assert_eq!(array.keys().value(0), 0);
1356 assert_eq!(array.keys().value(1), 2);
1357 assert_eq!(array.keys().value(3), 1);
1358
1359 assert_eq!(
1360 "DictionaryArray {keys: PrimitiveArray<Int32>\n[\n 0,\n 2,\n null,\n 1,\n] values: StringArray\n[\n \"foo\",\n \"bar\",\n \"baz\",\n]}\n",
1361 format!("{array:?}")
1362 );
1363 }
1364
1365 #[test]
1366 #[should_panic(expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2")]
1367 fn test_try_new_index_too_large() {
1368 let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
1369 let keys: Int32Array = [Some(0), Some(3)].into_iter().collect();
1371 DictionaryArray::new(keys, Arc::new(values));
1372 }
1373
1374 #[test]
1375 #[should_panic(expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2")]
1376 fn test_try_new_index_too_small() {
1377 let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
1378 let keys: Int32Array = [Some(-100)].into_iter().collect();
1379 DictionaryArray::new(keys, Arc::new(values));
1380 }
1381
1382 #[test]
1383 #[should_panic(expected = "DictionaryArray's data type must match, expected Int64 got Int32")]
1384 fn test_from_array_data_validation() {
1385 let a = DictionaryArray::<Int32Type>::from_iter(["32"]);
1386 let _ = DictionaryArray::<Int64Type>::from(a.into_data());
1387 }
1388
1389 #[test]
1390 fn test_into_primitive_dict_builder() {
1391 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1392 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1393
1394 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1395
1396 let boxed: ArrayRef = Arc::new(dict_array);
1397 let col: DictionaryArray<Int8Type> = as_dictionary_array(&boxed).clone();
1398
1399 drop(boxed);
1400
1401 let mut builder = col.into_primitive_dict_builder::<Int32Type>().unwrap();
1402
1403 let slice = builder.values_slice_mut();
1404 assert_eq!(slice, &[10, 12, 15]);
1405
1406 slice[0] = 4;
1407 slice[1] = 2;
1408 slice[2] = 1;
1409
1410 let values = Int32Array::from_iter_values([4_i32, 2, 1]);
1411 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1412
1413 let expected = DictionaryArray::new(keys, Arc::new(values));
1414
1415 let new_array = builder.finish();
1416 assert_eq!(expected, new_array);
1417 }
1418
1419 #[test]
1420 fn test_into_primitive_dict_builder_cloned_array() {
1421 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1422 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1423
1424 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1425
1426 let boxed: ArrayRef = Arc::new(dict_array);
1427
1428 let col: DictionaryArray<Int8Type> = DictionaryArray::<Int8Type>::from(boxed.to_data());
1429 let err = col.into_primitive_dict_builder::<Int32Type>();
1430
1431 let returned = err.unwrap_err();
1432
1433 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1434 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1435
1436 let expected = DictionaryArray::new(keys, Arc::new(values));
1437 assert_eq!(expected, returned);
1438 }
1439
1440 #[test]
1441 fn test_occupancy() {
1442 let keys = Int32Array::new((100..200).collect(), None);
1443 let values = Int32Array::from(vec![0; 1024]);
1444 let dict = DictionaryArray::new(keys, Arc::new(values));
1445 for (idx, v) in dict.occupancy().iter().enumerate() {
1446 let expected = (100..200).contains(&idx);
1447 assert_eq!(v, expected, "{idx}");
1448 }
1449
1450 let keys = Int32Array::new(
1451 (0..100).collect(),
1452 Some((0..100).map(|x| x % 4 == 0).collect()),
1453 );
1454 let values = Int32Array::from(vec![0; 1024]);
1455 let dict = DictionaryArray::new(keys, Arc::new(values));
1456 for (idx, v) in dict.occupancy().iter().enumerate() {
1457 let expected = idx % 4 == 0 && idx < 100;
1458 assert_eq!(v, expected, "{idx}");
1459 }
1460 }
1461
1462 #[test]
1463 fn test_iterator_nulls() {
1464 let keys = Int32Array::new(
1465 vec![0, 700, 1, 2].into(),
1466 Some(NullBuffer::from(vec![true, false, true, true])),
1467 );
1468 let values = Int32Array::from(vec![Some(50), None, Some(2)]);
1469 let dict = DictionaryArray::new(keys, Arc::new(values));
1470 let values: Vec<_> = dict
1471 .downcast_dict::<Int32Array>()
1472 .unwrap()
1473 .into_iter()
1474 .collect();
1475 assert_eq!(values, &[Some(50), None, None, Some(2)])
1476 }
1477
1478 #[test]
1479 fn test_logical_nulls() -> Result<(), ArrowError> {
1480 let values = Arc::new(RunArray::try_new(
1481 &Int32Array::from(vec![1, 3, 7]),
1482 &Int32Array::from(vec![Some(1), None, Some(3)]),
1483 )?) as ArrayRef;
1484
1485 assert_eq!(values.null_count(), 0);
1487 assert_eq!(values.logical_null_count(), 2);
1488
1489 let dictionary = DictionaryArray::<Int8Type>::try_new(
1491 Int8Array::from((0..values.len()).map(|i| i as i8).collect::<Vec<_>>()),
1492 Arc::clone(&values),
1493 )?;
1494
1495 assert_eq!(dictionary.null_count(), 0);
1497 assert_eq!(dictionary.logical_null_count(), values.logical_null_count());
1499 assert_eq!(dictionary.logical_nulls(), values.logical_nulls());
1500 assert!(dictionary.is_nullable());
1501
1502 let dictionary = DictionaryArray::<Int8Type>::try_new(
1504 Int8Array::from(
1505 (0..values.len())
1506 .map(|i| i as i8)
1507 .map(|i| if i == 0 { None } else { Some(i) })
1508 .collect::<Vec<_>>(),
1509 ),
1510 Arc::clone(&values),
1511 )?;
1512
1513 assert_eq!(dictionary.null_count(), 1);
1515
1516 assert_eq!(
1518 dictionary.logical_null_count(),
1519 values.logical_null_count() + 1
1520 );
1521 assert!(dictionary.is_nullable());
1522
1523 Ok(())
1524 }
1525
1526 #[test]
1527 fn test_normalized_keys() {
1528 let values = vec![132, 0, 1].into();
1529 let nulls = NullBuffer::from(vec![false, true, true]);
1530 let keys = Int32Array::new(values, Some(nulls));
1531 let dictionary = DictionaryArray::new(keys, Arc::new(Int32Array::new_null(2)));
1532 assert_eq!(&dictionary.normalized_keys(), &[1, 0, 1])
1533 }
1534
1535 #[test]
1536 fn test_all_null_dict() {
1537 let all_null_dict_arr = DictionaryArray::try_new(
1538 UInt8Array::new_null(10),
1539 Arc::new(StringArray::from_iter_values(["a"])),
1540 );
1541 assert!(all_null_dict_arr.is_ok())
1542 }
1543}