1use crate::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder};
19use crate::cast::AsArray;
20use crate::iterator::ArrayIter;
21use crate::types::*;
22use crate::{
23 make_array, Array, ArrayAccessor, ArrayRef, ArrowNativeTypeOp, PrimitiveArray, Scalar,
24 StringArray,
25};
26use arrow_buffer::bit_util::set_bit;
27use arrow_buffer::buffer::NullBuffer;
28use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder};
29use arrow_data::ArrayData;
30use arrow_schema::{ArrowError, DataType};
31use std::any::Any;
32use std::sync::Arc;
33
34pub type Int8DictionaryArray = DictionaryArray<Int8Type>;
49
50pub type Int16DictionaryArray = DictionaryArray<Int16Type>;
65
66pub type Int32DictionaryArray = DictionaryArray<Int32Type>;
81
82pub type Int64DictionaryArray = DictionaryArray<Int64Type>;
97
98pub type UInt8DictionaryArray = DictionaryArray<UInt8Type>;
113
114pub type UInt16DictionaryArray = DictionaryArray<UInt16Type>;
129
130pub type UInt32DictionaryArray = DictionaryArray<UInt32Type>;
145
146pub type UInt64DictionaryArray = DictionaryArray<UInt64Type>;
161
162pub struct DictionaryArray<K: ArrowDictionaryKeyType> {
244 data_type: DataType,
245
246 keys: PrimitiveArray<K>,
251
252 values: ArrayRef,
254
255 is_ordered: bool,
257}
258
259impl<K: ArrowDictionaryKeyType> Clone for DictionaryArray<K> {
260 fn clone(&self) -> Self {
261 Self {
262 data_type: self.data_type.clone(),
263 keys: self.keys.clone(),
264 values: self.values.clone(),
265 is_ordered: self.is_ordered,
266 }
267 }
268}
269
270impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
271 pub fn new(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
279 Self::try_new(keys, values).unwrap()
280 }
281
282 pub fn try_new(keys: PrimitiveArray<K>, values: ArrayRef) -> Result<Self, ArrowError> {
290 let data_type = DataType::Dictionary(
291 Box::new(keys.data_type().clone()),
292 Box::new(values.data_type().clone()),
293 );
294
295 let zero = K::Native::usize_as(0);
296 let values_len = values.len();
297
298 if let Some((idx, v)) =
299 keys.values().iter().enumerate().find(|(idx, v)| {
300 (v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
301 })
302 {
303 return Err(ArrowError::InvalidArgumentError(format!(
304 "Invalid dictionary key {v:?} at index {idx}, expected 0 <= key < {values_len}",
305 )));
306 }
307
308 Ok(Self {
309 data_type,
310 keys,
311 values,
312 is_ordered: false,
313 })
314 }
315
316 pub fn new_scalar<T: Array + 'static>(value: Scalar<T>) -> Scalar<Self> {
318 Scalar::new(Self::new(
319 PrimitiveArray::new(vec![K::Native::usize_as(0)].into(), None),
320 Arc::new(value.into_inner()),
321 ))
322 }
323
324 pub unsafe fn new_unchecked(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
330 if cfg!(feature = "force_validate") {
331 return Self::new(keys, values);
332 }
333
334 let data_type = DataType::Dictionary(
335 Box::new(keys.data_type().clone()),
336 Box::new(values.data_type().clone()),
337 );
338
339 Self {
340 data_type,
341 keys,
342 values,
343 is_ordered: false,
344 }
345 }
346
347 pub fn into_parts(self) -> (PrimitiveArray<K>, ArrayRef) {
349 (self.keys, self.values)
350 }
351
352 pub fn keys(&self) -> &PrimitiveArray<K> {
354 &self.keys
355 }
356
357 pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
363 let rd_buf: &StringArray = self.values.as_any().downcast_ref::<StringArray>().unwrap();
364
365 (0..rd_buf.len())
366 .position(|i| rd_buf.value(i) == value)
367 .and_then(K::Native::from_usize)
368 }
369
370 pub fn values(&self) -> &ArrayRef {
372 &self.values
373 }
374
375 pub fn value_type(&self) -> DataType {
377 self.values.data_type().clone()
378 }
379
380 pub fn len(&self) -> usize {
382 self.keys.len()
383 }
384
385 pub fn is_empty(&self) -> bool {
387 self.keys.is_empty()
388 }
389
390 pub fn is_ordered(&self) -> bool {
392 self.is_ordered
393 }
394
395 pub fn keys_iter(&self) -> impl Iterator<Item = Option<usize>> + '_ {
397 self.keys.iter().map(|key| key.map(|k| k.as_usize()))
398 }
399
400 pub fn key(&self, i: usize) -> Option<usize> {
403 self.keys.is_valid(i).then(|| self.keys.value(i).as_usize())
404 }
405
406 pub fn slice(&self, offset: usize, length: usize) -> Self {
408 Self {
409 data_type: self.data_type.clone(),
410 keys: self.keys.slice(offset, length),
411 values: self.values.clone(),
412 is_ordered: self.is_ordered,
413 }
414 }
415
416 pub fn downcast_dict<V: 'static>(&self) -> Option<TypedDictionaryArray<'_, K, V>> {
430 let values = self.values.as_any().downcast_ref()?;
431 Some(TypedDictionaryArray {
432 dictionary: self,
433 values,
434 })
435 }
436
437 pub fn with_values(&self, values: ArrayRef) -> Self {
475 assert!(values.len() >= self.values.len());
476 let data_type =
477 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
478 Self {
479 data_type,
480 keys: self.keys.clone(),
481 values,
482 is_ordered: false,
483 }
484 }
485
486 pub fn into_primitive_dict_builder<V>(self) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
489 where
490 V: ArrowPrimitiveType,
491 {
492 if !self.value_type().is_primitive() {
493 return Err(self);
494 }
495
496 let key_array = self.keys().clone();
497 let value_array = self.values().as_primitive::<V>().clone();
498
499 drop(self.keys);
500 drop(self.values);
501
502 let key_builder = key_array.into_builder();
503 let value_builder = value_array.into_builder();
504
505 match (key_builder, value_builder) {
506 (Ok(key_builder), Ok(value_builder)) => Ok(unsafe {
507 PrimitiveDictionaryBuilder::new_from_builders(key_builder, value_builder)
508 }),
509 (Err(key_array), Ok(mut value_builder)) => {
510 Err(Self::try_new(key_array, Arc::new(value_builder.finish())).unwrap())
511 }
512 (Ok(mut key_builder), Err(value_array)) => {
513 Err(Self::try_new(key_builder.finish(), Arc::new(value_array)).unwrap())
514 }
515 (Err(key_array), Err(value_array)) => {
516 Err(Self::try_new(key_array, Arc::new(value_array)).unwrap())
517 }
518 }
519 }
520
521 pub fn unary_mut<F, V>(self, op: F) -> Result<DictionaryArray<K>, DictionaryArray<K>>
545 where
546 V: ArrowPrimitiveType,
547 F: Fn(V::Native) -> V::Native,
548 {
549 let mut builder: PrimitiveDictionaryBuilder<K, V> = self.into_primitive_dict_builder()?;
550 builder
551 .values_slice_mut()
552 .iter_mut()
553 .for_each(|v| *v = op(*v));
554 Ok(builder.finish())
555 }
556
557 pub fn occupancy(&self) -> BooleanBuffer {
562 let len = self.values.len();
563 let mut builder = BooleanBufferBuilder::new(len);
564 builder.resize(len);
565 let slice = builder.as_slice_mut();
566 match self.keys.nulls().filter(|n| n.null_count() > 0) {
567 Some(n) => {
568 let v = self.keys.values();
569 n.valid_indices()
570 .for_each(|idx| set_bit(slice, v[idx].as_usize()))
571 }
572 None => {
573 let v = self.keys.values();
574 v.iter().for_each(|v| set_bit(slice, v.as_usize()))
575 }
576 }
577 builder.finish()
578 }
579}
580
581impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
583 fn from(data: ArrayData) -> Self {
584 assert_eq!(
585 data.buffers().len(),
586 1,
587 "DictionaryArray data should contain a single buffer only (keys)."
588 );
589 assert_eq!(
590 data.child_data().len(),
591 1,
592 "DictionaryArray should contain a single child array (values)."
593 );
594
595 if let DataType::Dictionary(key_data_type, _) = data.data_type() {
596 assert_eq!(
597 &T::DATA_TYPE,
598 key_data_type.as_ref(),
599 "DictionaryArray's data type must match, expected {} got {}",
600 T::DATA_TYPE,
601 key_data_type
602 );
603
604 let values = make_array(data.child_data()[0].clone());
605 let data_type = data.data_type().clone();
606
607 let keys = PrimitiveArray::<T>::from(unsafe {
612 data.into_builder()
613 .data_type(T::DATA_TYPE)
614 .child_data(vec![])
615 .build_unchecked()
616 });
617
618 Self {
619 data_type,
620 keys,
621 values,
622 is_ordered: false,
623 }
624 } else {
625 panic!("DictionaryArray must have Dictionary data type.")
626 }
627 }
628}
629
630impl<T: ArrowDictionaryKeyType> From<DictionaryArray<T>> for ArrayData {
631 fn from(array: DictionaryArray<T>) -> Self {
632 let builder = array
633 .keys
634 .into_data()
635 .into_builder()
636 .data_type(array.data_type)
637 .child_data(vec![array.values.to_data()]);
638
639 unsafe { builder.build_unchecked() }
640 }
641}
642
643impl<'a, T: ArrowDictionaryKeyType> FromIterator<Option<&'a str>> for DictionaryArray<T> {
660 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
661 let it = iter.into_iter();
662 let (lower, _) = it.size_hint();
663 let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
664 builder.extend(it);
665 builder.finish()
666 }
667}
668
669impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str> for DictionaryArray<T> {
684 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
685 let it = iter.into_iter();
686 let (lower, _) = it.size_hint();
687 let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
688 it.for_each(|i| {
689 builder
690 .append(i)
691 .expect("Unable to append a value to a dictionary array.");
692 });
693
694 builder.finish()
695 }
696}
697
698impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
699 fn as_any(&self) -> &dyn Any {
700 self
701 }
702
703 fn to_data(&self) -> ArrayData {
704 self.clone().into()
705 }
706
707 fn into_data(self) -> ArrayData {
708 self.into()
709 }
710
711 fn data_type(&self) -> &DataType {
712 &self.data_type
713 }
714
715 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
716 Arc::new(self.slice(offset, length))
717 }
718
719 fn len(&self) -> usize {
720 self.keys.len()
721 }
722
723 fn is_empty(&self) -> bool {
724 self.keys.is_empty()
725 }
726
727 fn shrink_to_fit(&mut self) {
728 self.keys.shrink_to_fit();
729 self.values.shrink_to_fit();
730 }
731
732 fn offset(&self) -> usize {
733 self.keys.offset()
734 }
735
736 fn nulls(&self) -> Option<&NullBuffer> {
737 self.keys.nulls()
738 }
739
740 fn logical_nulls(&self) -> Option<NullBuffer> {
741 match self.values.logical_nulls() {
742 None => self.nulls().cloned(),
743 Some(value_nulls) => {
744 let mut builder = BooleanBufferBuilder::new(self.len());
745 match self.keys.nulls() {
746 Some(n) => builder.append_buffer(n.inner()),
747 None => builder.append_n(self.len(), true),
748 }
749 for (idx, k) in self.keys.values().iter().enumerate() {
750 let k = k.as_usize();
751 if k < value_nulls.len() && value_nulls.is_null(k) {
753 builder.set_bit(idx, false);
754 }
755 }
756 Some(builder.finish().into())
757 }
758 }
759 }
760
761 fn logical_null_count(&self) -> usize {
762 match (self.keys.nulls(), self.values.logical_nulls()) {
763 (None, None) => 0,
764 (Some(key_nulls), None) => key_nulls.null_count(),
765 (None, Some(value_nulls)) => self
766 .keys
767 .values()
768 .iter()
769 .filter(|k| value_nulls.is_null(k.as_usize()))
770 .count(),
771 (Some(key_nulls), Some(value_nulls)) => self
772 .keys
773 .values()
774 .iter()
775 .enumerate()
776 .filter(|(idx, k)| key_nulls.is_null(*idx) || value_nulls.is_null(k.as_usize()))
777 .count(),
778 }
779 }
780
781 fn is_nullable(&self) -> bool {
782 !self.is_empty() && (self.nulls().is_some() || self.values.is_nullable())
783 }
784
785 fn get_buffer_memory_size(&self) -> usize {
786 self.keys.get_buffer_memory_size() + self.values.get_buffer_memory_size()
787 }
788
789 fn get_array_memory_size(&self) -> usize {
790 std::mem::size_of::<Self>()
791 + self.keys.get_buffer_memory_size()
792 + self.values.get_array_memory_size()
793 }
794}
795
796impl<T: ArrowDictionaryKeyType> std::fmt::Debug for DictionaryArray<T> {
797 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
798 writeln!(
799 f,
800 "DictionaryArray {{keys: {:?} values: {:?}}}",
801 self.keys, self.values
802 )
803 }
804}
805
806pub struct TypedDictionaryArray<'a, K: ArrowDictionaryKeyType, V> {
824 dictionary: &'a DictionaryArray<K>,
826 values: &'a V,
828}
829
830impl<K: ArrowDictionaryKeyType, V> Clone for TypedDictionaryArray<'_, K, V> {
832 fn clone(&self) -> Self {
833 *self
834 }
835}
836
837impl<K: ArrowDictionaryKeyType, V> Copy for TypedDictionaryArray<'_, K, V> {}
838
839impl<K: ArrowDictionaryKeyType, V> std::fmt::Debug for TypedDictionaryArray<'_, K, V> {
840 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
841 writeln!(f, "TypedDictionaryArray({:?})", self.dictionary)
842 }
843}
844
845impl<'a, K: ArrowDictionaryKeyType, V> TypedDictionaryArray<'a, K, V> {
846 pub fn keys(&self) -> &'a PrimitiveArray<K> {
848 self.dictionary.keys()
849 }
850
851 pub fn values(&self) -> &'a V {
853 self.values
854 }
855}
856
857impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V> {
858 fn as_any(&self) -> &dyn Any {
859 self.dictionary
860 }
861
862 fn to_data(&self) -> ArrayData {
863 self.dictionary.to_data()
864 }
865
866 fn into_data(self) -> ArrayData {
867 self.dictionary.into_data()
868 }
869
870 fn data_type(&self) -> &DataType {
871 self.dictionary.data_type()
872 }
873
874 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
875 Arc::new(self.dictionary.slice(offset, length))
876 }
877
878 fn len(&self) -> usize {
879 self.dictionary.len()
880 }
881
882 fn is_empty(&self) -> bool {
883 self.dictionary.is_empty()
884 }
885
886 fn offset(&self) -> usize {
887 self.dictionary.offset()
888 }
889
890 fn nulls(&self) -> Option<&NullBuffer> {
891 self.dictionary.nulls()
892 }
893
894 fn logical_nulls(&self) -> Option<NullBuffer> {
895 self.dictionary.logical_nulls()
896 }
897
898 fn logical_null_count(&self) -> usize {
899 self.dictionary.logical_null_count()
900 }
901
902 fn is_nullable(&self) -> bool {
903 self.dictionary.is_nullable()
904 }
905
906 fn get_buffer_memory_size(&self) -> usize {
907 self.dictionary.get_buffer_memory_size()
908 }
909
910 fn get_array_memory_size(&self) -> usize {
911 self.dictionary.get_array_memory_size()
912 }
913}
914
915impl<K, V> IntoIterator for TypedDictionaryArray<'_, K, V>
916where
917 K: ArrowDictionaryKeyType,
918 Self: ArrayAccessor,
919{
920 type Item = Option<<Self as ArrayAccessor>::Item>;
921 type IntoIter = ArrayIter<Self>;
922
923 fn into_iter(self) -> Self::IntoIter {
924 ArrayIter::new(self)
925 }
926}
927
928impl<'a, K, V> ArrayAccessor for TypedDictionaryArray<'a, K, V>
929where
930 K: ArrowDictionaryKeyType,
931 V: Sync + Send,
932 &'a V: ArrayAccessor,
933 <&'a V as ArrayAccessor>::Item: Default,
934{
935 type Item = <&'a V as ArrayAccessor>::Item;
936
937 fn value(&self, index: usize) -> Self::Item {
938 assert!(
939 index < self.len(),
940 "Trying to access an element at index {} from a TypedDictionaryArray of length {}",
941 index,
942 self.len()
943 );
944 unsafe { self.value_unchecked(index) }
945 }
946
947 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
948 let val = self.dictionary.keys.value_unchecked(index);
949 let value_idx = val.as_usize();
950
951 match value_idx < self.values.len() {
954 true => self.values.value_unchecked(value_idx),
955 false => Default::default(),
956 }
957 }
958}
959
960pub trait AnyDictionaryArray: Array {
1005 fn keys(&self) -> &dyn Array;
1007
1008 fn values(&self) -> &ArrayRef;
1010
1011 fn normalized_keys(&self) -> Vec<usize>;
1020
1021 fn with_values(&self, values: ArrayRef) -> ArrayRef;
1025}
1026
1027impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for DictionaryArray<K> {
1028 fn keys(&self) -> &dyn Array {
1029 &self.keys
1030 }
1031
1032 fn values(&self) -> &ArrayRef {
1033 self.values()
1034 }
1035
1036 fn normalized_keys(&self) -> Vec<usize> {
1037 let v_len = self.values().len();
1038 assert_ne!(v_len, 0);
1039 let iter = self.keys().values().iter();
1040 iter.map(|x| x.as_usize().min(v_len - 1)).collect()
1041 }
1042
1043 fn with_values(&self, values: ArrayRef) -> ArrayRef {
1044 Arc::new(self.with_values(values))
1045 }
1046}
1047
1048#[cfg(test)]
1049mod tests {
1050 use super::*;
1051 use crate::cast::as_dictionary_array;
1052 use crate::{Int16Array, Int32Array, Int8Array, RunArray};
1053 use arrow_buffer::{Buffer, ToByteSlice};
1054
1055 #[test]
1056 fn test_dictionary_array() {
1057 let value_data = ArrayData::builder(DataType::Int8)
1059 .len(8)
1060 .add_buffer(Buffer::from(
1061 [10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
1062 ))
1063 .build()
1064 .unwrap();
1065
1066 let keys = Buffer::from([2_i16, 3, 4].to_byte_slice());
1068
1069 let key_type = DataType::Int16;
1071 let value_type = DataType::Int8;
1072 let dict_data_type = DataType::Dictionary(Box::new(key_type), Box::new(value_type));
1073 let dict_data = ArrayData::builder(dict_data_type.clone())
1074 .len(3)
1075 .add_buffer(keys.clone())
1076 .add_child_data(value_data.clone())
1077 .build()
1078 .unwrap();
1079 let dict_array = Int16DictionaryArray::from(dict_data);
1080
1081 let values = dict_array.values();
1082 assert_eq!(value_data, values.to_data());
1083 assert_eq!(DataType::Int8, dict_array.value_type());
1084 assert_eq!(3, dict_array.len());
1085
1086 assert_eq!(0, dict_array.null_count());
1088 assert_eq!(0, dict_array.values().null_count());
1089 assert_eq!(dict_array.keys(), &Int16Array::from(vec![2_i16, 3, 4]));
1090
1091 let dict_data = ArrayData::builder(dict_data_type)
1093 .len(2)
1094 .offset(1)
1095 .add_buffer(keys)
1096 .add_child_data(value_data.clone())
1097 .build()
1098 .unwrap();
1099 let dict_array = Int16DictionaryArray::from(dict_data);
1100
1101 let values = dict_array.values();
1102 assert_eq!(value_data, values.to_data());
1103 assert_eq!(DataType::Int8, dict_array.value_type());
1104 assert_eq!(2, dict_array.len());
1105 assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4]));
1106 }
1107
1108 #[test]
1109 fn test_dictionary_builder_append_many() {
1110 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::new();
1111
1112 builder.append(1).unwrap();
1113 builder.append_n(2, 2).unwrap();
1114 builder.append_options(None, 2);
1115 builder.append_options(Some(3), 3);
1116
1117 let array = builder.finish();
1118
1119 let values = array
1120 .values()
1121 .as_primitive::<UInt32Type>()
1122 .iter()
1123 .map(Option::unwrap)
1124 .collect::<Vec<_>>();
1125 assert_eq!(values, &[1, 2, 3]);
1126 let keys = array.keys().iter().collect::<Vec<_>>();
1127 assert_eq!(
1128 keys,
1129 &[
1130 Some(0),
1131 Some(1),
1132 Some(1),
1133 None,
1134 None,
1135 Some(2),
1136 Some(2),
1137 Some(2)
1138 ]
1139 );
1140 }
1141
1142 #[test]
1143 fn test_string_dictionary_builder_append_many() {
1144 let mut builder = StringDictionaryBuilder::<Int8Type>::new();
1145
1146 builder.append("a").unwrap();
1147 builder.append_n("b", 2).unwrap();
1148 builder.append_options(None::<&str>, 2);
1149 builder.append_options(Some("c"), 3);
1150
1151 let array = builder.finish();
1152
1153 let values = array
1154 .values()
1155 .as_string::<i32>()
1156 .iter()
1157 .map(Option::unwrap)
1158 .collect::<Vec<_>>();
1159 assert_eq!(values, &["a", "b", "c"]);
1160 let keys = array.keys().iter().collect::<Vec<_>>();
1161 assert_eq!(
1162 keys,
1163 &[
1164 Some(0),
1165 Some(1),
1166 Some(1),
1167 None,
1168 None,
1169 Some(2),
1170 Some(2),
1171 Some(2)
1172 ]
1173 );
1174 }
1175
1176 #[test]
1177 fn test_dictionary_array_fmt_debug() {
1178 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
1179 builder.append(12345678).unwrap();
1180 builder.append_null();
1181 builder.append(22345678).unwrap();
1182 let array = builder.finish();
1183 assert_eq!(
1184 "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n null,\n 1,\n] values: PrimitiveArray<UInt32>\n[\n 12345678,\n 22345678,\n]}\n",
1185 format!("{array:?}")
1186 );
1187
1188 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
1189 for _ in 0..20 {
1190 builder.append(1).unwrap();
1191 }
1192 let array = builder.finish();
1193 assert_eq!(
1194 "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n] values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
1195 format!("{array:?}")
1196 );
1197 }
1198
1199 #[test]
1200 fn test_dictionary_array_from_iter() {
1201 let test = vec!["a", "a", "b", "c"];
1202 let array: DictionaryArray<Int8Type> = test
1203 .iter()
1204 .map(|&x| if x == "b" { None } else { Some(x) })
1205 .collect();
1206 assert_eq!(
1207 "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n null,\n 1,\n] values: StringArray\n[\n \"a\",\n \"c\",\n]}\n",
1208 format!("{array:?}")
1209 );
1210
1211 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1212 assert_eq!(
1213 "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n 1,\n 2,\n] values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
1214 format!("{array:?}")
1215 );
1216 }
1217
1218 #[test]
1219 fn test_dictionary_array_reverse_lookup_key() {
1220 let test = vec!["a", "a", "b", "c"];
1221 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1222
1223 assert_eq!(array.lookup_key("c"), Some(2));
1224
1225 let test = vec!["t3", "t3", "t2", "t2", "t1", "t3", "t4", "t1", "t0"];
1227 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1228
1229 assert_eq!(array.lookup_key("t1"), Some(2));
1230 assert_eq!(array.lookup_key("non-existent"), None);
1231 }
1232
1233 #[test]
1234 fn test_dictionary_keys_as_primitive_array() {
1235 let test = vec!["a", "b", "c", "a"];
1236 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1237
1238 let keys = array.keys();
1239 assert_eq!(&DataType::Int8, keys.data_type());
1240 assert_eq!(0, keys.null_count());
1241 assert_eq!(&[0, 1, 2, 0], keys.values());
1242 }
1243
1244 #[test]
1245 fn test_dictionary_keys_as_primitive_array_with_null() {
1246 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1247 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1248
1249 let keys = array.keys();
1250 assert_eq!(&DataType::Int32, keys.data_type());
1251 assert_eq!(3, keys.null_count());
1252
1253 assert!(keys.is_valid(0));
1254 assert!(!keys.is_valid(1));
1255 assert!(keys.is_valid(2));
1256 assert!(!keys.is_valid(3));
1257 assert!(!keys.is_valid(4));
1258 assert!(keys.is_valid(5));
1259
1260 assert_eq!(0, keys.value(0));
1261 assert_eq!(1, keys.value(2));
1262 assert_eq!(0, keys.value(5));
1263 }
1264
1265 #[test]
1266 fn test_dictionary_all_nulls() {
1267 let test = vec![None, None, None];
1268 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1269 array
1270 .into_data()
1271 .validate_full()
1272 .expect("All null array has valid array data");
1273 }
1274
1275 #[test]
1276 fn test_dictionary_iter() {
1277 let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1279 let keys = Int16Array::from_iter_values([2_i16, 3, 4]);
1280
1281 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1283
1284 let mut key_iter = dict_array.keys_iter();
1285 assert_eq!(2, key_iter.next().unwrap().unwrap());
1286 assert_eq!(3, key_iter.next().unwrap().unwrap());
1287 assert_eq!(4, key_iter.next().unwrap().unwrap());
1288 assert!(key_iter.next().is_none());
1289
1290 let mut iter = dict_array
1291 .values()
1292 .as_any()
1293 .downcast_ref::<Int8Array>()
1294 .unwrap()
1295 .take_iter(dict_array.keys_iter());
1296
1297 assert_eq!(12, iter.next().unwrap().unwrap());
1298 assert_eq!(13, iter.next().unwrap().unwrap());
1299 assert_eq!(14, iter.next().unwrap().unwrap());
1300 assert!(iter.next().is_none());
1301 }
1302
1303 #[test]
1304 fn test_dictionary_iter_with_null() {
1305 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1306 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1307
1308 let mut iter = array
1309 .values()
1310 .as_any()
1311 .downcast_ref::<StringArray>()
1312 .unwrap()
1313 .take_iter(array.keys_iter());
1314
1315 assert_eq!("a", iter.next().unwrap().unwrap());
1316 assert!(iter.next().unwrap().is_none());
1317 assert_eq!("b", iter.next().unwrap().unwrap());
1318 assert!(iter.next().unwrap().is_none());
1319 assert!(iter.next().unwrap().is_none());
1320 assert_eq!("a", iter.next().unwrap().unwrap());
1321 assert!(iter.next().is_none());
1322 }
1323
1324 #[test]
1325 fn test_dictionary_key() {
1326 let keys = Int8Array::from(vec![Some(2), None, Some(1)]);
1327 let values = StringArray::from(vec!["foo", "bar", "baz", "blarg"]);
1328
1329 let array = DictionaryArray::new(keys, Arc::new(values));
1330 assert_eq!(array.key(0), Some(2));
1331 assert_eq!(array.key(1), None);
1332 assert_eq!(array.key(2), Some(1));
1333 }
1334
1335 #[test]
1336 fn test_try_new() {
1337 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1338 .into_iter()
1339 .collect();
1340 let keys: Int32Array = [Some(0), Some(2), None, Some(1)].into_iter().collect();
1341
1342 let array = DictionaryArray::new(keys, Arc::new(values));
1343 assert_eq!(array.keys().data_type(), &DataType::Int32);
1344 assert_eq!(array.values().data_type(), &DataType::Utf8);
1345
1346 assert_eq!(array.null_count(), 1);
1347 assert_eq!(array.logical_null_count(), 1);
1348
1349 assert!(array.keys().is_valid(0));
1350 assert!(array.keys().is_valid(1));
1351 assert!(array.keys().is_null(2));
1352 assert!(array.keys().is_valid(3));
1353
1354 assert_eq!(array.keys().value(0), 0);
1355 assert_eq!(array.keys().value(1), 2);
1356 assert_eq!(array.keys().value(3), 1);
1357
1358 assert_eq!(
1359 "DictionaryArray {keys: PrimitiveArray<Int32>\n[\n 0,\n 2,\n null,\n 1,\n] values: StringArray\n[\n \"foo\",\n \"bar\",\n \"baz\",\n]}\n",
1360 format!("{array:?}")
1361 );
1362 }
1363
1364 #[test]
1365 #[should_panic(expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2")]
1366 fn test_try_new_index_too_large() {
1367 let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
1368 let keys: Int32Array = [Some(0), Some(3)].into_iter().collect();
1370 DictionaryArray::new(keys, Arc::new(values));
1371 }
1372
1373 #[test]
1374 #[should_panic(expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2")]
1375 fn test_try_new_index_too_small() {
1376 let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
1377 let keys: Int32Array = [Some(-100)].into_iter().collect();
1378 DictionaryArray::new(keys, Arc::new(values));
1379 }
1380
1381 #[test]
1382 #[should_panic(expected = "DictionaryArray's data type must match, expected Int64 got Int32")]
1383 fn test_from_array_data_validation() {
1384 let a = DictionaryArray::<Int32Type>::from_iter(["32"]);
1385 let _ = DictionaryArray::<Int64Type>::from(a.into_data());
1386 }
1387
1388 #[test]
1389 fn test_into_primitive_dict_builder() {
1390 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1391 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1392
1393 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1394
1395 let boxed: ArrayRef = Arc::new(dict_array);
1396 let col: DictionaryArray<Int8Type> = as_dictionary_array(&boxed).clone();
1397
1398 drop(boxed);
1399
1400 let mut builder = col.into_primitive_dict_builder::<Int32Type>().unwrap();
1401
1402 let slice = builder.values_slice_mut();
1403 assert_eq!(slice, &[10, 12, 15]);
1404
1405 slice[0] = 4;
1406 slice[1] = 2;
1407 slice[2] = 1;
1408
1409 let values = Int32Array::from_iter_values([4_i32, 2, 1]);
1410 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1411
1412 let expected = DictionaryArray::new(keys, Arc::new(values));
1413
1414 let new_array = builder.finish();
1415 assert_eq!(expected, new_array);
1416 }
1417
1418 #[test]
1419 fn test_into_primitive_dict_builder_cloned_array() {
1420 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1421 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1422
1423 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1424
1425 let boxed: ArrayRef = Arc::new(dict_array);
1426
1427 let col: DictionaryArray<Int8Type> = DictionaryArray::<Int8Type>::from(boxed.to_data());
1428 let err = col.into_primitive_dict_builder::<Int32Type>();
1429
1430 let returned = err.unwrap_err();
1431
1432 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1433 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1434
1435 let expected = DictionaryArray::new(keys, Arc::new(values));
1436 assert_eq!(expected, returned);
1437 }
1438
1439 #[test]
1440 fn test_occupancy() {
1441 let keys = Int32Array::new((100..200).collect(), None);
1442 let values = Int32Array::from(vec![0; 1024]);
1443 let dict = DictionaryArray::new(keys, Arc::new(values));
1444 for (idx, v) in dict.occupancy().iter().enumerate() {
1445 let expected = (100..200).contains(&idx);
1446 assert_eq!(v, expected, "{idx}");
1447 }
1448
1449 let keys = Int32Array::new(
1450 (0..100).collect(),
1451 Some((0..100).map(|x| x % 4 == 0).collect()),
1452 );
1453 let values = Int32Array::from(vec![0; 1024]);
1454 let dict = DictionaryArray::new(keys, Arc::new(values));
1455 for (idx, v) in dict.occupancy().iter().enumerate() {
1456 let expected = idx % 4 == 0 && idx < 100;
1457 assert_eq!(v, expected, "{idx}");
1458 }
1459 }
1460
1461 #[test]
1462 fn test_iterator_nulls() {
1463 let keys = Int32Array::new(
1464 vec![0, 700, 1, 2].into(),
1465 Some(NullBuffer::from(vec![true, false, true, true])),
1466 );
1467 let values = Int32Array::from(vec![Some(50), None, Some(2)]);
1468 let dict = DictionaryArray::new(keys, Arc::new(values));
1469 let values: Vec<_> = dict
1470 .downcast_dict::<Int32Array>()
1471 .unwrap()
1472 .into_iter()
1473 .collect();
1474 assert_eq!(values, &[Some(50), None, None, Some(2)])
1475 }
1476
1477 #[test]
1478 fn test_logical_nulls() -> Result<(), ArrowError> {
1479 let values = Arc::new(RunArray::try_new(
1480 &Int32Array::from(vec![1, 3, 7]),
1481 &Int32Array::from(vec![Some(1), None, Some(3)]),
1482 )?) as ArrayRef;
1483
1484 assert_eq!(values.null_count(), 0);
1486 assert_eq!(values.logical_null_count(), 2);
1487
1488 let dictionary = DictionaryArray::<Int8Type>::try_new(
1490 Int8Array::from((0..values.len()).map(|i| i as i8).collect::<Vec<_>>()),
1491 Arc::clone(&values),
1492 )?;
1493
1494 assert_eq!(dictionary.null_count(), 0);
1496 assert_eq!(dictionary.logical_null_count(), values.logical_null_count());
1498 assert_eq!(dictionary.logical_nulls(), values.logical_nulls());
1499 assert!(dictionary.is_nullable());
1500
1501 let dictionary = DictionaryArray::<Int8Type>::try_new(
1503 Int8Array::from(
1504 (0..values.len())
1505 .map(|i| i as i8)
1506 .map(|i| if i == 0 { None } else { Some(i) })
1507 .collect::<Vec<_>>(),
1508 ),
1509 Arc::clone(&values),
1510 )?;
1511
1512 assert_eq!(dictionary.null_count(), 1);
1514
1515 assert_eq!(
1517 dictionary.logical_null_count(),
1518 values.logical_null_count() + 1
1519 );
1520 assert!(dictionary.is_nullable());
1521
1522 Ok(())
1523 }
1524
1525 #[test]
1526 fn test_normalized_keys() {
1527 let values = vec![132, 0, 1].into();
1528 let nulls = NullBuffer::from(vec![false, true, true]);
1529 let keys = Int32Array::new(values, Some(nulls));
1530 let dictionary = DictionaryArray::new(keys, Arc::new(Int32Array::new_null(2)));
1531 assert_eq!(&dictionary.normalized_keys(), &[1, 0, 1])
1532 }
1533}