1use crate::builder::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
19use crate::types::{ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType};
20use crate::{
21 Array, ArrayRef, DictionaryArray, GenericByteArray, PrimitiveArray, TypedDictionaryArray,
22};
23use arrow_buffer::ArrowNativeType;
24use arrow_schema::{ArrowError, DataType};
25use hashbrown::HashTable;
26use num_traits::NumCast;
27use std::any::Any;
28use std::sync::Arc;
29
30#[derive(Debug)]
36pub struct GenericByteDictionaryBuilder<K, T>
37where
38 K: ArrowDictionaryKeyType,
39 T: ByteArrayType,
40{
41 state: ahash::RandomState,
42 dedup: HashTable<usize>,
43
44 keys_builder: PrimitiveBuilder<K>,
45 values_builder: GenericByteBuilder<T>,
46}
47
48impl<K, T> Default for GenericByteDictionaryBuilder<K, T>
49where
50 K: ArrowDictionaryKeyType,
51 T: ByteArrayType,
52{
53 fn default() -> Self {
54 Self::new()
55 }
56}
57
58impl<K, T> GenericByteDictionaryBuilder<K, T>
59where
60 K: ArrowDictionaryKeyType,
61 T: ByteArrayType,
62{
63 pub fn new() -> Self {
65 let keys_builder = PrimitiveBuilder::new();
66 let values_builder = GenericByteBuilder::<T>::new();
67 Self {
68 state: Default::default(),
69 dedup: HashTable::with_capacity(keys_builder.capacity()),
70 keys_builder,
71 values_builder,
72 }
73 }
74
75 pub fn with_capacity(
81 keys_capacity: usize,
82 value_capacity: usize,
83 data_capacity: usize,
84 ) -> Self {
85 Self {
86 state: Default::default(),
87 dedup: HashTable::with_capacity(value_capacity),
88 keys_builder: PrimitiveBuilder::with_capacity(keys_capacity),
89 values_builder: GenericByteBuilder::<T>::with_capacity(value_capacity, data_capacity),
90 }
91 }
92
93 pub fn new_with_dictionary(
117 keys_capacity: usize,
118 dictionary_values: &GenericByteArray<T>,
119 ) -> Result<Self, ArrowError> {
120 let state = ahash::RandomState::default();
121 let dict_len = dictionary_values.len();
122
123 let mut dedup = HashTable::with_capacity(dict_len);
124
125 let values_len = dictionary_values.value_data().len();
126 let mut values_builder = GenericByteBuilder::<T>::with_capacity(dict_len, values_len);
127
128 K::Native::from_usize(dictionary_values.len())
129 .ok_or(ArrowError::DictionaryKeyOverflowError)?;
130
131 for (idx, maybe_value) in dictionary_values.iter().enumerate() {
132 match maybe_value {
133 Some(value) => {
134 let value_bytes: &[u8] = value.as_ref();
135 let hash = state.hash_one(value_bytes);
136
137 dedup
138 .entry(
139 hash,
140 |idx: &usize| value_bytes == get_bytes(&values_builder, *idx),
141 |idx: &usize| state.hash_one(get_bytes(&values_builder, *idx)),
142 )
143 .or_insert(idx);
144
145 values_builder.append_value(value);
146 }
147 None => values_builder.append_null(),
148 }
149 }
150
151 Ok(Self {
152 state,
153 dedup,
154 keys_builder: PrimitiveBuilder::with_capacity(keys_capacity),
155 values_builder,
156 })
157 }
158
159 pub fn try_new_from_builder<K2>(
187 mut source: GenericByteDictionaryBuilder<K2, T>,
188 ) -> Result<Self, ArrowError>
189 where
190 K::Native: NumCast,
191 K2: ArrowDictionaryKeyType,
192 K2::Native: NumCast,
193 {
194 let state = source.state;
195 let dedup = source.dedup;
196 let values_builder = source.values_builder;
197
198 let source_keys = source.keys_builder.finish();
199 let new_keys: PrimitiveArray<K> = source_keys.try_unary(|value| {
200 num_traits::cast::cast::<K2::Native, K::Native>(value).ok_or_else(|| {
201 ArrowError::CastError(format!(
202 "Can't cast dictionary keys from source type {:?} to type {:?}",
203 K2::DATA_TYPE,
204 K::DATA_TYPE
205 ))
206 })
207 })?;
208
209 drop(source_keys);
213
214 Ok(Self {
215 state,
216 dedup,
217 keys_builder: new_keys
218 .into_builder()
219 .expect("underlying buffer has no references"),
220 values_builder,
221 })
222 }
223}
224
225impl<K, T> ArrayBuilder for GenericByteDictionaryBuilder<K, T>
226where
227 K: ArrowDictionaryKeyType,
228 T: ByteArrayType,
229{
230 fn as_any(&self) -> &dyn Any {
232 self
233 }
234
235 fn as_any_mut(&mut self) -> &mut dyn Any {
237 self
238 }
239
240 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
242 self
243 }
244
245 fn len(&self) -> usize {
247 self.keys_builder.len()
248 }
249
250 fn finish(&mut self) -> ArrayRef {
252 Arc::new(self.finish())
253 }
254
255 fn finish_cloned(&self) -> ArrayRef {
257 Arc::new(self.finish_cloned())
258 }
259
260 fn finish_preserve_values(&mut self) -> ArrayRef {
261 Arc::new(self.finish_preserve_values())
262 }
263}
264
265impl<K, T> GenericByteDictionaryBuilder<K, T>
266where
267 K: ArrowDictionaryKeyType,
268 T: ByteArrayType,
269{
270 fn get_or_insert_key(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
271 let value_native: &T::Native = value.as_ref();
272 let value_bytes: &[u8] = value_native.as_ref();
273
274 let state = &self.state;
275 let storage = &mut self.values_builder;
276 let hash = state.hash_one(value_bytes);
277
278 let idx = *self
279 .dedup
280 .entry(
281 hash,
282 |idx| value_bytes == get_bytes(storage, *idx),
283 |idx| state.hash_one(get_bytes(storage, *idx)),
284 )
285 .or_insert_with(|| {
286 let idx = storage.len();
287 storage.append_value(value);
288 idx
289 })
290 .get();
291
292 let key = K::Native::from_usize(idx).ok_or(ArrowError::DictionaryKeyOverflowError)?;
293
294 Ok(key)
295 }
296
297 pub fn append(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
303 let key = self.get_or_insert_key(value)?;
304 self.keys_builder.append_value(key);
305 Ok(key)
306 }
307
308 pub fn append_n(
313 &mut self,
314 value: impl AsRef<T::Native>,
315 count: usize,
316 ) -> Result<K::Native, ArrowError> {
317 let key = self.get_or_insert_key(value)?;
318 self.keys_builder.append_value_n(key, count);
319 Ok(key)
320 }
321
322 pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
328 self.append(value).expect("dictionary key overflow");
329 }
330
331 pub fn append_values(&mut self, value: impl AsRef<T::Native>, count: usize) {
338 self.append_n(value, count)
339 .expect("dictionary key overflow");
340 }
341
342 #[inline]
344 pub fn append_null(&mut self) {
345 self.keys_builder.append_null()
346 }
347
348 #[inline]
350 pub fn append_nulls(&mut self, n: usize) {
351 self.keys_builder.append_nulls(n)
352 }
353
354 #[inline]
360 pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
361 match value {
362 None => self.append_null(),
363 Some(v) => self.append_value(v),
364 };
365 }
366
367 pub fn append_options(&mut self, value: Option<impl AsRef<T::Native>>, count: usize) {
374 match value {
375 None => self.keys_builder.append_nulls(count),
376 Some(v) => self.append_values(v, count),
377 };
378 }
379
380 pub fn extend_dictionary(
388 &mut self,
389 dictionary: &TypedDictionaryArray<K, GenericByteArray<T>>,
390 ) -> Result<(), ArrowError> {
391 let values = dictionary.values();
392
393 let v_len = values.len();
394 let k_len = dictionary.keys().len();
395 if v_len == 0 && k_len == 0 {
396 return Ok(());
397 }
398
399 if v_len == 0 {
401 self.append_nulls(k_len);
402 return Ok(());
403 }
404
405 if k_len == 0 {
406 return Err(ArrowError::InvalidArgumentError(
407 "Dictionary keys should not be empty when values are not empty".to_string(),
408 ));
409 }
410
411 let mapped_values = values
413 .iter()
414 .map(|dict_value| {
416 dict_value
417 .map(|dict_value| self.get_or_insert_key(dict_value))
418 .transpose()
419 })
420 .collect::<Result<Vec<_>, _>>()?;
421
422 dictionary.keys().iter().for_each(|key| match key {
424 None => self.append_null(),
425 Some(original_dict_index) => {
426 let index = original_dict_index.as_usize().min(v_len - 1);
427 match mapped_values[index] {
428 None => self.append_null(),
429 Some(mapped_value) => self.keys_builder.append_value(mapped_value),
430 }
431 }
432 });
433
434 Ok(())
435 }
436
437 pub fn finish(&mut self) -> DictionaryArray<K> {
439 self.dedup.clear();
440 let values = self.values_builder.finish();
441 let keys = self.keys_builder.finish();
442
443 let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
444
445 let builder = keys
446 .into_data()
447 .into_builder()
448 .data_type(data_type)
449 .child_data(vec![values.into_data()]);
450
451 DictionaryArray::from(unsafe { builder.build_unchecked() })
452 }
453
454 pub fn finish_cloned(&self) -> DictionaryArray<K> {
456 let values = self.values_builder.finish_cloned();
457 let keys = self.keys_builder.finish_cloned();
458
459 let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
460
461 let builder = keys
462 .into_data()
463 .into_builder()
464 .data_type(data_type)
465 .child_data(vec![values.into_data()]);
466
467 DictionaryArray::from(unsafe { builder.build_unchecked() })
468 }
469
470 pub fn finish_preserve_values(&mut self) -> DictionaryArray<K> {
488 let values = self.values_builder.finish_cloned();
489 let keys = self.keys_builder.finish();
490
491 let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
492
493 let builder = keys
494 .into_data()
495 .into_builder()
496 .data_type(data_type)
497 .child_data(vec![values.into_data()]);
498
499 DictionaryArray::from(unsafe { builder.build_unchecked() })
500 }
501
502 pub fn validity_slice(&self) -> Option<&[u8]> {
504 self.keys_builder.validity_slice()
505 }
506}
507
508impl<K: ArrowDictionaryKeyType, T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>>
509 for GenericByteDictionaryBuilder<K, T>
510{
511 #[inline]
512 fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
513 for v in iter {
514 self.append_option(v)
515 }
516 }
517}
518
519fn get_bytes<T: ByteArrayType>(values: &GenericByteBuilder<T>, idx: usize) -> &[u8] {
520 let offsets = values.offsets_slice();
521 let values = values.values_slice();
522
523 let end_offset = offsets[idx + 1].as_usize();
524 let start_offset = offsets[idx].as_usize();
525
526 &values[start_offset..end_offset]
527}
528
529pub type StringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i32>>;
562
563pub type LargeStringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i64>>;
565
566pub type BinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i32>>;
600
601pub type LargeBinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i64>>;
603
604#[cfg(test)]
605mod tests {
606 use super::*;
607
608 use crate::array::Int8Array;
609 use crate::cast::AsArray;
610 use crate::types::{Int8Type, Int16Type, Int32Type, UInt8Type, UInt16Type, Utf8Type};
611 use crate::{ArrowPrimitiveType, BinaryArray, StringArray};
612
613 fn test_bytes_dictionary_builder<T>(values: Vec<&T::Native>)
614 where
615 T: ByteArrayType,
616 <T as ByteArrayType>::Native: PartialEq,
617 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
618 {
619 let mut builder = GenericByteDictionaryBuilder::<Int8Type, T>::new();
620 builder.append(values[0]).unwrap();
621 builder.append_null();
622 builder.append(values[1]).unwrap();
623 builder.append(values[1]).unwrap();
624 builder.append(values[0]).unwrap();
625 let array = builder.finish();
626
627 assert_eq!(
628 array.keys(),
629 &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
630 );
631
632 let av = array.values();
634 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
635
636 assert_eq!(*ava.value(0), *values[0]);
637 assert_eq!(*ava.value(1), *values[1]);
638 }
639
640 #[test]
641 fn test_string_dictionary_builder() {
642 test_bytes_dictionary_builder::<GenericStringType<i32>>(vec!["abc", "def"]);
643 }
644
645 #[test]
646 fn test_binary_dictionary_builder() {
647 test_bytes_dictionary_builder::<GenericBinaryType<i32>>(vec![b"abc", b"def"]);
648 }
649
650 #[test]
651 fn test_with_capacity_presizes_dedup() {
652 let value_capacity = 128;
655 let builder =
656 GenericByteDictionaryBuilder::<Int32Type, GenericStringType<i32>>::with_capacity(
657 256,
658 value_capacity,
659 value_capacity * 32,
660 );
661 assert!(
662 builder.dedup.capacity() >= value_capacity,
663 "dedup HashTable not pre-sized: got capacity {}, expected >= {}",
664 builder.dedup.capacity(),
665 value_capacity,
666 );
667 }
668
669 fn test_bytes_dictionary_builder_finish_cloned<T>(values: Vec<&T::Native>)
670 where
671 T: ByteArrayType,
672 <T as ByteArrayType>::Native: PartialEq,
673 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
674 {
675 let mut builder = GenericByteDictionaryBuilder::<Int8Type, T>::new();
676
677 builder.append(values[0]).unwrap();
678 builder.append_null();
679 builder.append(values[1]).unwrap();
680 builder.append(values[1]).unwrap();
681 builder.append(values[0]).unwrap();
682 let mut array = builder.finish_cloned();
683
684 assert_eq!(
685 array.keys(),
686 &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
687 );
688
689 let av = array.values();
691 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
692
693 assert_eq!(ava.value(0), values[0]);
694 assert_eq!(ava.value(1), values[1]);
695
696 builder.append(values[0]).unwrap();
697 builder.append(values[2]).unwrap();
698 builder.append(values[1]).unwrap();
699
700 array = builder.finish();
701
702 assert_eq!(
703 array.keys(),
704 &Int8Array::from(vec![
705 Some(0),
706 None,
707 Some(1),
708 Some(1),
709 Some(0),
710 Some(0),
711 Some(2),
712 Some(1)
713 ])
714 );
715
716 let av2 = array.values();
718 let ava2: &GenericByteArray<T> =
719 av2.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
720
721 assert_eq!(ava2.value(0), values[0]);
722 assert_eq!(ava2.value(1), values[1]);
723 assert_eq!(ava2.value(2), values[2]);
724 }
725
726 #[test]
727 fn test_string_dictionary_builder_finish_cloned() {
728 test_bytes_dictionary_builder_finish_cloned::<GenericStringType<i32>>(vec![
729 "abc", "def", "ghi",
730 ]);
731 }
732
733 #[test]
734 fn test_binary_dictionary_builder_finish_cloned() {
735 test_bytes_dictionary_builder_finish_cloned::<GenericBinaryType<i32>>(vec![
736 b"abc", b"def", b"ghi",
737 ]);
738 }
739
740 fn _test_try_new_from_builder_generic_for_key_types<K1, K2, T>(values: Vec<&T::Native>)
741 where
742 K1: ArrowDictionaryKeyType,
743 K1::Native: NumCast,
744 K2: ArrowDictionaryKeyType,
745 K2::Native: NumCast + From<u8>,
746 T: ByteArrayType,
747 <T as ByteArrayType>::Native: PartialEq + AsRef<<T as ByteArrayType>::Native>,
748 {
749 let mut source = GenericByteDictionaryBuilder::<K1, T>::new();
750 source.append(values[0]).unwrap();
751 source.append(values[1]).unwrap();
752 source.append_null();
753 source.append(values[2]).unwrap();
754
755 let mut result =
756 GenericByteDictionaryBuilder::<K2, T>::try_new_from_builder(source).unwrap();
757 let array = result.finish();
758
759 let mut expected_keys_builder = PrimitiveBuilder::<K2>::new();
760 expected_keys_builder
761 .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(0u8));
762 expected_keys_builder
763 .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(1u8));
764 expected_keys_builder.append_null();
765 expected_keys_builder
766 .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(2u8));
767 let expected_keys = expected_keys_builder.finish();
768 assert_eq!(array.keys(), &expected_keys);
769
770 let av = array.values();
771 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
772 assert_eq!(ava.value(0), values[0]);
773 assert_eq!(ava.value(1), values[1]);
774 assert_eq!(ava.value(2), values[2]);
775 }
776
777 fn test_try_new_from_builder<T>(values: Vec<&T::Native>)
778 where
779 T: ByteArrayType,
780 <T as ByteArrayType>::Native: PartialEq + AsRef<<T as ByteArrayType>::Native>,
781 {
782 _test_try_new_from_builder_generic_for_key_types::<UInt8Type, UInt16Type, T>(
784 values.clone(),
785 );
786 _test_try_new_from_builder_generic_for_key_types::<UInt16Type, UInt8Type, T>(
788 values.clone(),
789 );
790 _test_try_new_from_builder_generic_for_key_types::<Int8Type, Int16Type, T>(values.clone());
792 _test_try_new_from_builder_generic_for_key_types::<Int32Type, Int16Type, T>(values.clone());
794 _test_try_new_from_builder_generic_for_key_types::<UInt8Type, Int16Type, T>(values.clone());
796 _test_try_new_from_builder_generic_for_key_types::<Int8Type, UInt8Type, T>(values.clone());
797 _test_try_new_from_builder_generic_for_key_types::<Int8Type, UInt16Type, T>(values.clone());
798 _test_try_new_from_builder_generic_for_key_types::<Int32Type, Int16Type, T>(values.clone());
799 }
800
801 #[test]
802 fn test_string_dictionary_builder_try_new_from_builder() {
803 test_try_new_from_builder::<GenericStringType<i32>>(vec!["abc", "def", "ghi"]);
804 }
805
806 #[test]
807 fn test_binary_dictionary_builder_try_new_from_builder() {
808 test_try_new_from_builder::<GenericBinaryType<i32>>(vec![b"abc", b"def", b"ghi"]);
809 }
810
811 #[test]
812 fn test_try_new_from_builder_cast_fails() {
813 let mut source_builder = StringDictionaryBuilder::<UInt16Type>::new();
814 for i in 0..257 {
815 source_builder.append_value(format!("val{i}"));
816 }
817
818 let result = StringDictionaryBuilder::<UInt8Type>::try_new_from_builder(source_builder);
821 assert!(result.is_err());
822 if let Err(e) = result {
823 assert!(matches!(e, ArrowError::CastError(_)));
824 assert_eq!(
825 e.to_string(),
826 "Cast error: Can't cast dictionary keys from source type UInt16 to type UInt8"
827 );
828 }
829 }
830
831 fn test_bytes_dictionary_builder_with_existing_dictionary<T>(
832 dictionary: GenericByteArray<T>,
833 values: Vec<&T::Native>,
834 ) where
835 T: ByteArrayType,
836 <T as ByteArrayType>::Native: PartialEq,
837 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
838 {
839 let mut builder =
840 GenericByteDictionaryBuilder::<Int8Type, T>::new_with_dictionary(6, &dictionary)
841 .unwrap();
842 builder.append(values[0]).unwrap();
843 builder.append_null();
844 builder.append(values[1]).unwrap();
845 builder.append(values[1]).unwrap();
846 builder.append(values[0]).unwrap();
847 builder.append(values[2]).unwrap();
848 let array = builder.finish();
849
850 assert_eq!(
851 array.keys(),
852 &Int8Array::from(vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)])
853 );
854
855 let av = array.values();
857 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
858
859 assert!(!ava.is_valid(0));
860 assert_eq!(ava.value(1), values[1]);
861 assert_eq!(ava.value(2), values[0]);
862 assert_eq!(ava.value(3), values[2]);
863 }
864
865 #[test]
866 fn test_string_dictionary_builder_with_existing_dictionary() {
867 test_bytes_dictionary_builder_with_existing_dictionary::<GenericStringType<i32>>(
868 StringArray::from(vec![None, Some("def"), Some("abc")]),
869 vec!["abc", "def", "ghi"],
870 );
871 }
872
873 #[test]
874 fn test_binary_dictionary_builder_with_existing_dictionary() {
875 let values: Vec<Option<&[u8]>> = vec![None, Some(b"def"), Some(b"abc")];
876 test_bytes_dictionary_builder_with_existing_dictionary::<GenericBinaryType<i32>>(
877 BinaryArray::from(values),
878 vec![b"abc", b"def", b"ghi"],
879 );
880 }
881
882 fn test_bytes_dictionary_builder_with_reserved_null_value<T>(
883 dictionary: GenericByteArray<T>,
884 values: Vec<&T::Native>,
885 ) where
886 T: ByteArrayType,
887 <T as ByteArrayType>::Native: PartialEq,
888 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
889 {
890 let mut builder =
891 GenericByteDictionaryBuilder::<Int16Type, T>::new_with_dictionary(4, &dictionary)
892 .unwrap();
893 builder.append(values[0]).unwrap();
894 builder.append_null();
895 builder.append(values[1]).unwrap();
896 builder.append(values[0]).unwrap();
897 let array = builder.finish();
898
899 assert!(array.is_null(1));
900 assert!(!array.is_valid(1));
901
902 let keys = array.keys();
903
904 assert_eq!(keys.value(0), 1);
905 assert!(keys.is_null(1));
906 assert_eq!(keys.value(1), 0);
908 assert_eq!(keys.value(2), 2);
909 assert_eq!(keys.value(3), 1);
910 }
911
912 #[test]
913 fn test_string_dictionary_builder_with_reserved_null_value() {
914 let v: Vec<Option<&str>> = vec![None];
915 test_bytes_dictionary_builder_with_reserved_null_value::<GenericStringType<i32>>(
916 StringArray::from(v),
917 vec!["abc", "def"],
918 );
919 }
920
921 #[test]
922 fn test_binary_dictionary_builder_with_reserved_null_value() {
923 let values: Vec<Option<&[u8]>> = vec![None];
924 test_bytes_dictionary_builder_with_reserved_null_value::<GenericBinaryType<i32>>(
925 BinaryArray::from(values),
926 vec![b"abc", b"def"],
927 );
928 }
929
930 #[test]
931 fn test_extend() {
932 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
933 builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
934 builder.extend(["c", "d", "a"].into_iter().map(Some));
935 let dict = builder.finish();
936 assert_eq!(dict.keys().values(), &[0, 1, 2, 0, 1, 2, 2, 3, 0]);
937 assert_eq!(dict.values().len(), 4);
938 }
939
940 #[test]
941 fn test_extend_dictionary() {
942 let some_dict = {
943 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
944 builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
945 builder.extend([None::<&str>]);
946 builder.extend(["c", "d", "a"].into_iter().map(Some));
947 builder.append_null();
948 builder.finish()
949 };
950
951 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
952 builder.extend(["e", "e", "f", "e", "d"].into_iter().map(Some));
953 builder
954 .extend_dictionary(&some_dict.downcast_dict().unwrap())
955 .unwrap();
956 let dict = builder.finish();
957
958 assert_eq!(dict.values().len(), 6);
959
960 let values = dict
961 .downcast_dict::<GenericByteArray<Utf8Type>>()
962 .unwrap()
963 .into_iter()
964 .collect::<Vec<_>>();
965
966 assert_eq!(
967 values,
968 [
969 Some("e"),
970 Some("e"),
971 Some("f"),
972 Some("e"),
973 Some("d"),
974 Some("a"),
975 Some("b"),
976 Some("c"),
977 Some("a"),
978 Some("b"),
979 Some("c"),
980 None,
981 Some("c"),
982 Some("d"),
983 Some("a"),
984 None
985 ]
986 );
987 }
988 #[test]
989 fn test_extend_dictionary_with_null_in_mapped_value() {
990 let some_dict = {
991 let mut values_builder = GenericByteBuilder::<Utf8Type>::new();
992 let mut keys_builder = PrimitiveBuilder::<Int32Type>::new();
993
994 values_builder.append_null();
996 keys_builder.append_value(0);
997 values_builder.append_value("I like worm hugs");
998 keys_builder.append_value(1);
999
1000 let values = values_builder.finish();
1001 let keys = keys_builder.finish();
1002
1003 let data_type = DataType::Dictionary(
1004 Box::new(Int32Type::DATA_TYPE),
1005 Box::new(Utf8Type::DATA_TYPE),
1006 );
1007
1008 let builder = keys
1009 .into_data()
1010 .into_builder()
1011 .data_type(data_type)
1012 .child_data(vec![values.into_data()]);
1013
1014 DictionaryArray::from(unsafe { builder.build_unchecked() })
1015 };
1016
1017 let some_dict_values = some_dict.values().as_string::<i32>();
1018 assert_eq!(
1019 some_dict_values.into_iter().collect::<Vec<_>>(),
1020 &[None, Some("I like worm hugs")]
1021 );
1022
1023 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
1024 builder
1025 .extend_dictionary(&some_dict.downcast_dict().unwrap())
1026 .unwrap();
1027 let dict = builder.finish();
1028
1029 assert_eq!(dict.values().len(), 1);
1030
1031 let values = dict
1032 .downcast_dict::<GenericByteArray<Utf8Type>>()
1033 .unwrap()
1034 .into_iter()
1035 .collect::<Vec<_>>();
1036
1037 assert_eq!(values, [None, Some("I like worm hugs")]);
1038 }
1039
1040 #[test]
1041 fn test_extend_all_null_dictionary() {
1042 let some_dict = {
1043 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
1044 builder.append_nulls(2);
1045 builder.finish()
1046 };
1047
1048 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
1049 builder
1050 .extend_dictionary(&some_dict.downcast_dict().unwrap())
1051 .unwrap();
1052 let dict = builder.finish();
1053
1054 assert_eq!(dict.values().len(), 0);
1055
1056 let values = dict
1057 .downcast_dict::<GenericByteArray<Utf8Type>>()
1058 .unwrap()
1059 .into_iter()
1060 .collect::<Vec<_>>();
1061
1062 assert_eq!(values, [None, None]);
1063 }
1064
1065 #[test]
1066 fn test_finish_preserve_values() {
1067 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
1069 builder.append("a").unwrap();
1070 builder.append("b").unwrap();
1071 builder.append("c").unwrap();
1072 let dict = builder.finish_preserve_values();
1073 assert_eq!(dict.keys().values(), &[0, 1, 2]);
1074 assert_eq!(dict.values().len(), 3);
1075 let values = dict
1076 .downcast_dict::<GenericByteArray<Utf8Type>>()
1077 .unwrap()
1078 .into_iter()
1079 .collect::<Vec<_>>();
1080 assert_eq!(values, [Some("a"), Some("b"), Some("c")]);
1081
1082 builder.append("d").unwrap();
1084 builder.append("e").unwrap();
1085 let dict2 = builder.finish_preserve_values();
1086
1087 assert_eq!(dict2.keys().values(), &[3, 4]);
1090 let values = dict2
1091 .downcast_dict::<GenericByteArray<Utf8Type>>()
1092 .unwrap()
1093 .into_iter()
1094 .collect::<Vec<_>>();
1095 assert_eq!(values, [Some("d"), Some("e")]);
1096
1097 assert_eq!(dict2.values().len(), 5);
1099 let all_values = dict2
1100 .values()
1101 .as_any()
1102 .downcast_ref::<StringArray>()
1103 .unwrap()
1104 .into_iter()
1105 .collect::<Vec<_>>();
1106 assert_eq!(
1107 all_values,
1108 [Some("a"), Some("b"), Some("c"), Some("d"), Some("e"),]
1109 );
1110 }
1111}