1use crate::builder::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
19use crate::types::{ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType};
20use crate::{
21 Array, ArrayRef, DictionaryArray, GenericByteArray, PrimitiveArray, TypedDictionaryArray,
22};
23use arrow_buffer::ArrowNativeType;
24use arrow_schema::{ArrowError, DataType};
25use hashbrown::HashTable;
26use num::NumCast;
27use std::any::Any;
28use std::sync::Arc;
29
30#[derive(Debug)]
36pub struct GenericByteDictionaryBuilder<K, T>
37where
38 K: ArrowDictionaryKeyType,
39 T: ByteArrayType,
40{
41 state: ahash::RandomState,
42 dedup: HashTable<usize>,
43
44 keys_builder: PrimitiveBuilder<K>,
45 values_builder: GenericByteBuilder<T>,
46}
47
48impl<K, T> Default for GenericByteDictionaryBuilder<K, T>
49where
50 K: ArrowDictionaryKeyType,
51 T: ByteArrayType,
52{
53 fn default() -> Self {
54 Self::new()
55 }
56}
57
58impl<K, T> GenericByteDictionaryBuilder<K, T>
59where
60 K: ArrowDictionaryKeyType,
61 T: ByteArrayType,
62{
63 pub fn new() -> Self {
65 let keys_builder = PrimitiveBuilder::new();
66 let values_builder = GenericByteBuilder::<T>::new();
67 Self {
68 state: Default::default(),
69 dedup: HashTable::with_capacity(keys_builder.capacity()),
70 keys_builder,
71 values_builder,
72 }
73 }
74
75 pub fn with_capacity(
81 keys_capacity: usize,
82 value_capacity: usize,
83 data_capacity: usize,
84 ) -> Self {
85 Self {
86 state: Default::default(),
87 dedup: Default::default(),
88 keys_builder: PrimitiveBuilder::with_capacity(keys_capacity),
89 values_builder: GenericByteBuilder::<T>::with_capacity(value_capacity, data_capacity),
90 }
91 }
92
93 pub fn new_with_dictionary(
117 keys_capacity: usize,
118 dictionary_values: &GenericByteArray<T>,
119 ) -> Result<Self, ArrowError> {
120 let state = ahash::RandomState::default();
121 let dict_len = dictionary_values.len();
122
123 let mut dedup = HashTable::with_capacity(dict_len);
124
125 let values_len = dictionary_values.value_data().len();
126 let mut values_builder = GenericByteBuilder::<T>::with_capacity(dict_len, values_len);
127
128 K::Native::from_usize(dictionary_values.len())
129 .ok_or(ArrowError::DictionaryKeyOverflowError)?;
130
131 for (idx, maybe_value) in dictionary_values.iter().enumerate() {
132 match maybe_value {
133 Some(value) => {
134 let value_bytes: &[u8] = value.as_ref();
135 let hash = state.hash_one(value_bytes);
136
137 dedup
138 .entry(
139 hash,
140 |idx: &usize| value_bytes == get_bytes(&values_builder, *idx),
141 |idx: &usize| state.hash_one(get_bytes(&values_builder, *idx)),
142 )
143 .or_insert(idx);
144
145 values_builder.append_value(value);
146 }
147 None => values_builder.append_null(),
148 }
149 }
150
151 Ok(Self {
152 state,
153 dedup,
154 keys_builder: PrimitiveBuilder::with_capacity(keys_capacity),
155 values_builder,
156 })
157 }
158
159 pub fn try_new_from_builder<K2>(
187 mut source: GenericByteDictionaryBuilder<K2, T>,
188 ) -> Result<Self, ArrowError>
189 where
190 K::Native: NumCast,
191 K2: ArrowDictionaryKeyType,
192 K2::Native: NumCast,
193 {
194 let state = source.state;
195 let dedup = source.dedup;
196 let values_builder = source.values_builder;
197
198 let source_keys = source.keys_builder.finish();
199 let new_keys: PrimitiveArray<K> = source_keys.try_unary(|value| {
200 num::cast::cast::<K2::Native, K::Native>(value).ok_or_else(|| {
201 ArrowError::CastError(format!(
202 "Can't cast dictionary keys from source type {:?} to type {:?}",
203 K2::DATA_TYPE,
204 K::DATA_TYPE
205 ))
206 })
207 })?;
208
209 drop(source_keys);
213
214 Ok(Self {
215 state,
216 dedup,
217 keys_builder: new_keys
218 .into_builder()
219 .expect("underlying buffer has no references"),
220 values_builder,
221 })
222 }
223}
224
225impl<K, T> ArrayBuilder for GenericByteDictionaryBuilder<K, T>
226where
227 K: ArrowDictionaryKeyType,
228 T: ByteArrayType,
229{
230 fn as_any(&self) -> &dyn Any {
232 self
233 }
234
235 fn as_any_mut(&mut self) -> &mut dyn Any {
237 self
238 }
239
240 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
242 self
243 }
244
245 fn len(&self) -> usize {
247 self.keys_builder.len()
248 }
249
250 fn finish(&mut self) -> ArrayRef {
252 Arc::new(self.finish())
253 }
254
255 fn finish_cloned(&self) -> ArrayRef {
257 Arc::new(self.finish_cloned())
258 }
259}
260
261impl<K, T> GenericByteDictionaryBuilder<K, T>
262where
263 K: ArrowDictionaryKeyType,
264 T: ByteArrayType,
265{
266 fn get_or_insert_key(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
267 let value_native: &T::Native = value.as_ref();
268 let value_bytes: &[u8] = value_native.as_ref();
269
270 let state = &self.state;
271 let storage = &mut self.values_builder;
272 let hash = state.hash_one(value_bytes);
273
274 let idx = *self
275 .dedup
276 .entry(
277 hash,
278 |idx| value_bytes == get_bytes(storage, *idx),
279 |idx| state.hash_one(get_bytes(storage, *idx)),
280 )
281 .or_insert_with(|| {
282 let idx = storage.len();
283 storage.append_value(value);
284 idx
285 })
286 .get();
287
288 let key = K::Native::from_usize(idx).ok_or(ArrowError::DictionaryKeyOverflowError)?;
289
290 Ok(key)
291 }
292
293 pub fn append(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
299 let key = self.get_or_insert_key(value)?;
300 self.keys_builder.append_value(key);
301 Ok(key)
302 }
303
304 pub fn append_n(
309 &mut self,
310 value: impl AsRef<T::Native>,
311 count: usize,
312 ) -> Result<K::Native, ArrowError> {
313 let key = self.get_or_insert_key(value)?;
314 self.keys_builder.append_value_n(key, count);
315 Ok(key)
316 }
317
318 pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
324 self.append(value).expect("dictionary key overflow");
325 }
326
327 pub fn append_values(&mut self, value: impl AsRef<T::Native>, count: usize) {
334 self.append_n(value, count)
335 .expect("dictionary key overflow");
336 }
337
338 #[inline]
340 pub fn append_null(&mut self) {
341 self.keys_builder.append_null()
342 }
343
344 #[inline]
346 pub fn append_nulls(&mut self, n: usize) {
347 self.keys_builder.append_nulls(n)
348 }
349
350 #[inline]
356 pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
357 match value {
358 None => self.append_null(),
359 Some(v) => self.append_value(v),
360 };
361 }
362
363 pub fn append_options(&mut self, value: Option<impl AsRef<T::Native>>, count: usize) {
370 match value {
371 None => self.keys_builder.append_nulls(count),
372 Some(v) => self.append_values(v, count),
373 };
374 }
375
376 pub fn extend_dictionary(
384 &mut self,
385 dictionary: &TypedDictionaryArray<K, GenericByteArray<T>>,
386 ) -> Result<(), ArrowError> {
387 let values = dictionary.values();
388
389 let v_len = values.len();
390 let k_len = dictionary.keys().len();
391 if v_len == 0 && k_len == 0 {
392 return Ok(());
393 }
394
395 if v_len == 0 {
397 self.append_nulls(k_len);
398 return Ok(());
399 }
400
401 if k_len == 0 {
402 return Err(ArrowError::InvalidArgumentError(
403 "Dictionary keys should not be empty when values are not empty".to_string(),
404 ));
405 }
406
407 let mapped_values = values
409 .iter()
410 .map(|dict_value| {
412 dict_value
413 .map(|dict_value| self.get_or_insert_key(dict_value))
414 .transpose()
415 })
416 .collect::<Result<Vec<_>, _>>()?;
417
418 dictionary.keys().iter().for_each(|key| match key {
420 None => self.append_null(),
421 Some(original_dict_index) => {
422 let index = original_dict_index.as_usize().min(v_len - 1);
423 match mapped_values[index] {
424 None => self.append_null(),
425 Some(mapped_value) => self.keys_builder.append_value(mapped_value),
426 }
427 }
428 });
429
430 Ok(())
431 }
432
433 pub fn finish(&mut self) -> DictionaryArray<K> {
435 self.dedup.clear();
436 let values = self.values_builder.finish();
437 let keys = self.keys_builder.finish();
438
439 let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
440
441 let builder = keys
442 .into_data()
443 .into_builder()
444 .data_type(data_type)
445 .child_data(vec![values.into_data()]);
446
447 DictionaryArray::from(unsafe { builder.build_unchecked() })
448 }
449
450 pub fn finish_cloned(&self) -> DictionaryArray<K> {
452 let values = self.values_builder.finish_cloned();
453 let keys = self.keys_builder.finish_cloned();
454
455 let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
456
457 let builder = keys
458 .into_data()
459 .into_builder()
460 .data_type(data_type)
461 .child_data(vec![values.into_data()]);
462
463 DictionaryArray::from(unsafe { builder.build_unchecked() })
464 }
465
466 pub fn validity_slice(&self) -> Option<&[u8]> {
468 self.keys_builder.validity_slice()
469 }
470}
471
472impl<K: ArrowDictionaryKeyType, T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>>
473 for GenericByteDictionaryBuilder<K, T>
474{
475 #[inline]
476 fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
477 for v in iter {
478 self.append_option(v)
479 }
480 }
481}
482
483fn get_bytes<T: ByteArrayType>(values: &GenericByteBuilder<T>, idx: usize) -> &[u8] {
484 let offsets = values.offsets_slice();
485 let values = values.values_slice();
486
487 let end_offset = offsets[idx + 1].as_usize();
488 let start_offset = offsets[idx].as_usize();
489
490 &values[start_offset..end_offset]
491}
492
493pub type StringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i32>>;
526
527pub type LargeStringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i64>>;
529
530pub type BinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i32>>;
564
565pub type LargeBinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i64>>;
567
568#[cfg(test)]
569mod tests {
570 use super::*;
571
572 use crate::array::Int8Array;
573 use crate::cast::AsArray;
574 use crate::types::{Int16Type, Int32Type, Int8Type, UInt16Type, UInt8Type, Utf8Type};
575 use crate::{ArrowPrimitiveType, BinaryArray, StringArray};
576
577 fn test_bytes_dictionary_builder<T>(values: Vec<&T::Native>)
578 where
579 T: ByteArrayType,
580 <T as ByteArrayType>::Native: PartialEq,
581 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
582 {
583 let mut builder = GenericByteDictionaryBuilder::<Int8Type, T>::new();
584 builder.append(values[0]).unwrap();
585 builder.append_null();
586 builder.append(values[1]).unwrap();
587 builder.append(values[1]).unwrap();
588 builder.append(values[0]).unwrap();
589 let array = builder.finish();
590
591 assert_eq!(
592 array.keys(),
593 &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
594 );
595
596 let av = array.values();
598 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
599
600 assert_eq!(*ava.value(0), *values[0]);
601 assert_eq!(*ava.value(1), *values[1]);
602 }
603
604 #[test]
605 fn test_string_dictionary_builder() {
606 test_bytes_dictionary_builder::<GenericStringType<i32>>(vec!["abc", "def"]);
607 }
608
609 #[test]
610 fn test_binary_dictionary_builder() {
611 test_bytes_dictionary_builder::<GenericBinaryType<i32>>(vec![b"abc", b"def"]);
612 }
613
614 fn test_bytes_dictionary_builder_finish_cloned<T>(values: Vec<&T::Native>)
615 where
616 T: ByteArrayType,
617 <T as ByteArrayType>::Native: PartialEq,
618 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
619 {
620 let mut builder = GenericByteDictionaryBuilder::<Int8Type, T>::new();
621
622 builder.append(values[0]).unwrap();
623 builder.append_null();
624 builder.append(values[1]).unwrap();
625 builder.append(values[1]).unwrap();
626 builder.append(values[0]).unwrap();
627 let mut array = builder.finish_cloned();
628
629 assert_eq!(
630 array.keys(),
631 &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
632 );
633
634 let av = array.values();
636 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
637
638 assert_eq!(ava.value(0), values[0]);
639 assert_eq!(ava.value(1), values[1]);
640
641 builder.append(values[0]).unwrap();
642 builder.append(values[2]).unwrap();
643 builder.append(values[1]).unwrap();
644
645 array = builder.finish();
646
647 assert_eq!(
648 array.keys(),
649 &Int8Array::from(vec![
650 Some(0),
651 None,
652 Some(1),
653 Some(1),
654 Some(0),
655 Some(0),
656 Some(2),
657 Some(1)
658 ])
659 );
660
661 let av2 = array.values();
663 let ava2: &GenericByteArray<T> =
664 av2.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
665
666 assert_eq!(ava2.value(0), values[0]);
667 assert_eq!(ava2.value(1), values[1]);
668 assert_eq!(ava2.value(2), values[2]);
669 }
670
671 #[test]
672 fn test_string_dictionary_builder_finish_cloned() {
673 test_bytes_dictionary_builder_finish_cloned::<GenericStringType<i32>>(vec![
674 "abc", "def", "ghi",
675 ]);
676 }
677
678 #[test]
679 fn test_binary_dictionary_builder_finish_cloned() {
680 test_bytes_dictionary_builder_finish_cloned::<GenericBinaryType<i32>>(vec![
681 b"abc", b"def", b"ghi",
682 ]);
683 }
684
685 fn _test_try_new_from_builder_generic_for_key_types<K1, K2, T>(values: Vec<&T::Native>)
686 where
687 K1: ArrowDictionaryKeyType,
688 K1::Native: NumCast,
689 K2: ArrowDictionaryKeyType,
690 K2::Native: NumCast + From<u8>,
691 T: ByteArrayType,
692 <T as ByteArrayType>::Native: PartialEq + AsRef<<T as ByteArrayType>::Native>,
693 {
694 let mut source = GenericByteDictionaryBuilder::<K1, T>::new();
695 source.append(values[0]).unwrap();
696 source.append(values[1]).unwrap();
697 source.append_null();
698 source.append(values[2]).unwrap();
699
700 let mut result =
701 GenericByteDictionaryBuilder::<K2, T>::try_new_from_builder(source).unwrap();
702 let array = result.finish();
703
704 let mut expected_keys_builder = PrimitiveBuilder::<K2>::new();
705 expected_keys_builder
706 .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(0u8));
707 expected_keys_builder
708 .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(1u8));
709 expected_keys_builder.append_null();
710 expected_keys_builder
711 .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(2u8));
712 let expected_keys = expected_keys_builder.finish();
713 assert_eq!(array.keys(), &expected_keys);
714
715 let av = array.values();
716 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
717 assert_eq!(ava.value(0), values[0]);
718 assert_eq!(ava.value(1), values[1]);
719 assert_eq!(ava.value(2), values[2]);
720 }
721
722 fn test_try_new_from_builder<T>(values: Vec<&T::Native>)
723 where
724 T: ByteArrayType,
725 <T as ByteArrayType>::Native: PartialEq + AsRef<<T as ByteArrayType>::Native>,
726 {
727 _test_try_new_from_builder_generic_for_key_types::<UInt8Type, UInt16Type, T>(
729 values.clone(),
730 );
731 _test_try_new_from_builder_generic_for_key_types::<UInt16Type, UInt8Type, T>(
733 values.clone(),
734 );
735 _test_try_new_from_builder_generic_for_key_types::<Int8Type, Int16Type, T>(values.clone());
737 _test_try_new_from_builder_generic_for_key_types::<Int32Type, Int16Type, T>(values.clone());
739 _test_try_new_from_builder_generic_for_key_types::<UInt8Type, Int16Type, T>(values.clone());
741 _test_try_new_from_builder_generic_for_key_types::<Int8Type, UInt8Type, T>(values.clone());
742 _test_try_new_from_builder_generic_for_key_types::<Int8Type, UInt16Type, T>(values.clone());
743 _test_try_new_from_builder_generic_for_key_types::<Int32Type, Int16Type, T>(values.clone());
744 }
745
746 #[test]
747 fn test_string_dictionary_builder_try_new_from_builder() {
748 test_try_new_from_builder::<GenericStringType<i32>>(vec!["abc", "def", "ghi"]);
749 }
750
751 #[test]
752 fn test_binary_dictionary_builder_try_new_from_builder() {
753 test_try_new_from_builder::<GenericBinaryType<i32>>(vec![b"abc", b"def", b"ghi"]);
754 }
755
756 #[test]
757 fn test_try_new_from_builder_cast_fails() {
758 let mut source_builder = StringDictionaryBuilder::<UInt16Type>::new();
759 for i in 0..257 {
760 source_builder.append_value(format!("val{i}"));
761 }
762
763 let result = StringDictionaryBuilder::<UInt8Type>::try_new_from_builder(source_builder);
766 assert!(result.is_err());
767 if let Err(e) = result {
768 assert!(matches!(e, ArrowError::CastError(_)));
769 assert_eq!(
770 e.to_string(),
771 "Cast error: Can't cast dictionary keys from source type UInt16 to type UInt8"
772 );
773 }
774 }
775
776 fn test_bytes_dictionary_builder_with_existing_dictionary<T>(
777 dictionary: GenericByteArray<T>,
778 values: Vec<&T::Native>,
779 ) where
780 T: ByteArrayType,
781 <T as ByteArrayType>::Native: PartialEq,
782 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
783 {
784 let mut builder =
785 GenericByteDictionaryBuilder::<Int8Type, T>::new_with_dictionary(6, &dictionary)
786 .unwrap();
787 builder.append(values[0]).unwrap();
788 builder.append_null();
789 builder.append(values[1]).unwrap();
790 builder.append(values[1]).unwrap();
791 builder.append(values[0]).unwrap();
792 builder.append(values[2]).unwrap();
793 let array = builder.finish();
794
795 assert_eq!(
796 array.keys(),
797 &Int8Array::from(vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)])
798 );
799
800 let av = array.values();
802 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
803
804 assert!(!ava.is_valid(0));
805 assert_eq!(ava.value(1), values[1]);
806 assert_eq!(ava.value(2), values[0]);
807 assert_eq!(ava.value(3), values[2]);
808 }
809
810 #[test]
811 fn test_string_dictionary_builder_with_existing_dictionary() {
812 test_bytes_dictionary_builder_with_existing_dictionary::<GenericStringType<i32>>(
813 StringArray::from(vec![None, Some("def"), Some("abc")]),
814 vec!["abc", "def", "ghi"],
815 );
816 }
817
818 #[test]
819 fn test_binary_dictionary_builder_with_existing_dictionary() {
820 let values: Vec<Option<&[u8]>> = vec![None, Some(b"def"), Some(b"abc")];
821 test_bytes_dictionary_builder_with_existing_dictionary::<GenericBinaryType<i32>>(
822 BinaryArray::from(values),
823 vec![b"abc", b"def", b"ghi"],
824 );
825 }
826
827 fn test_bytes_dictionary_builder_with_reserved_null_value<T>(
828 dictionary: GenericByteArray<T>,
829 values: Vec<&T::Native>,
830 ) where
831 T: ByteArrayType,
832 <T as ByteArrayType>::Native: PartialEq,
833 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
834 {
835 let mut builder =
836 GenericByteDictionaryBuilder::<Int16Type, T>::new_with_dictionary(4, &dictionary)
837 .unwrap();
838 builder.append(values[0]).unwrap();
839 builder.append_null();
840 builder.append(values[1]).unwrap();
841 builder.append(values[0]).unwrap();
842 let array = builder.finish();
843
844 assert!(array.is_null(1));
845 assert!(!array.is_valid(1));
846
847 let keys = array.keys();
848
849 assert_eq!(keys.value(0), 1);
850 assert!(keys.is_null(1));
851 assert_eq!(keys.value(1), 0);
853 assert_eq!(keys.value(2), 2);
854 assert_eq!(keys.value(3), 1);
855 }
856
857 #[test]
858 fn test_string_dictionary_builder_with_reserved_null_value() {
859 let v: Vec<Option<&str>> = vec![None];
860 test_bytes_dictionary_builder_with_reserved_null_value::<GenericStringType<i32>>(
861 StringArray::from(v),
862 vec!["abc", "def"],
863 );
864 }
865
866 #[test]
867 fn test_binary_dictionary_builder_with_reserved_null_value() {
868 let values: Vec<Option<&[u8]>> = vec![None];
869 test_bytes_dictionary_builder_with_reserved_null_value::<GenericBinaryType<i32>>(
870 BinaryArray::from(values),
871 vec![b"abc", b"def"],
872 );
873 }
874
875 #[test]
876 fn test_extend() {
877 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
878 builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
879 builder.extend(["c", "d", "a"].into_iter().map(Some));
880 let dict = builder.finish();
881 assert_eq!(dict.keys().values(), &[0, 1, 2, 0, 1, 2, 2, 3, 0]);
882 assert_eq!(dict.values().len(), 4);
883 }
884
885 #[test]
886 fn test_extend_dictionary() {
887 let some_dict = {
888 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
889 builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
890 builder.extend([None::<&str>]);
891 builder.extend(["c", "d", "a"].into_iter().map(Some));
892 builder.append_null();
893 builder.finish()
894 };
895
896 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
897 builder.extend(["e", "e", "f", "e", "d"].into_iter().map(Some));
898 builder
899 .extend_dictionary(&some_dict.downcast_dict().unwrap())
900 .unwrap();
901 let dict = builder.finish();
902
903 assert_eq!(dict.values().len(), 6);
904
905 let values = dict
906 .downcast_dict::<GenericByteArray<Utf8Type>>()
907 .unwrap()
908 .into_iter()
909 .collect::<Vec<_>>();
910
911 assert_eq!(
912 values,
913 [
914 Some("e"),
915 Some("e"),
916 Some("f"),
917 Some("e"),
918 Some("d"),
919 Some("a"),
920 Some("b"),
921 Some("c"),
922 Some("a"),
923 Some("b"),
924 Some("c"),
925 None,
926 Some("c"),
927 Some("d"),
928 Some("a"),
929 None
930 ]
931 );
932 }
933 #[test]
934 fn test_extend_dictionary_with_null_in_mapped_value() {
935 let some_dict = {
936 let mut values_builder = GenericByteBuilder::<Utf8Type>::new();
937 let mut keys_builder = PrimitiveBuilder::<Int32Type>::new();
938
939 values_builder.append_null();
941 keys_builder.append_value(0);
942 values_builder.append_value("I like worm hugs");
943 keys_builder.append_value(1);
944
945 let values = values_builder.finish();
946 let keys = keys_builder.finish();
947
948 let data_type = DataType::Dictionary(
949 Box::new(Int32Type::DATA_TYPE),
950 Box::new(Utf8Type::DATA_TYPE),
951 );
952
953 let builder = keys
954 .into_data()
955 .into_builder()
956 .data_type(data_type)
957 .child_data(vec![values.into_data()]);
958
959 DictionaryArray::from(unsafe { builder.build_unchecked() })
960 };
961
962 let some_dict_values = some_dict.values().as_string::<i32>();
963 assert_eq!(
964 some_dict_values.into_iter().collect::<Vec<_>>(),
965 &[None, Some("I like worm hugs")]
966 );
967
968 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
969 builder
970 .extend_dictionary(&some_dict.downcast_dict().unwrap())
971 .unwrap();
972 let dict = builder.finish();
973
974 assert_eq!(dict.values().len(), 1);
975
976 let values = dict
977 .downcast_dict::<GenericByteArray<Utf8Type>>()
978 .unwrap()
979 .into_iter()
980 .collect::<Vec<_>>();
981
982 assert_eq!(values, [None, Some("I like worm hugs")]);
983 }
984
985 #[test]
986 fn test_extend_all_null_dictionary() {
987 let some_dict = {
988 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
989 builder.append_nulls(2);
990 builder.finish()
991 };
992
993 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
994 builder
995 .extend_dictionary(&some_dict.downcast_dict().unwrap())
996 .unwrap();
997 let dict = builder.finish();
998
999 assert_eq!(dict.values().len(), 0);
1000
1001 let values = dict
1002 .downcast_dict::<GenericByteArray<Utf8Type>>()
1003 .unwrap()
1004 .into_iter()
1005 .collect::<Vec<_>>();
1006
1007 assert_eq!(values, [None, None]);
1008 }
1009}