1use crate::cast::*;
19
20pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
25 array: &dyn Array,
26 to_type: &DataType,
27 cast_options: &CastOptions,
28) -> Result<ArrayRef, ArrowError> {
29 use DataType::*;
30
31 match to_type {
32 Dictionary(to_index_type, to_value_type) => {
33 let dict_array = array
34 .as_any()
35 .downcast_ref::<DictionaryArray<K>>()
36 .ok_or_else(|| {
37 ArrowError::ComputeError(
38 "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
39 )
40 })?;
41
42 let keys_array: ArrayRef =
43 Arc::new(PrimitiveArray::<K>::from(dict_array.keys().to_data()));
44 let values_array = dict_array.values();
45 let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
46 let cast_values = cast_with_options(values_array, to_value_type, cast_options)?;
47
48 if cast_keys.null_count() > keys_array.null_count() {
51 return Err(ArrowError::ComputeError(format!(
52 "Could not convert {} dictionary indexes from {:?} to {:?}",
53 cast_keys.null_count() - keys_array.null_count(),
54 keys_array.data_type(),
55 to_index_type
56 )));
57 }
58
59 let data = cast_keys.into_data();
60 let builder = data
61 .into_builder()
62 .data_type(to_type.clone())
63 .child_data(vec![cast_values.into_data()]);
64
65 let data = unsafe { builder.build_unchecked() };
68
69 let new_array: ArrayRef = match **to_index_type {
71 Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
72 Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
73 Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
74 Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
75 UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
76 UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
77 UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
78 UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
79 _ => {
80 return Err(ArrowError::CastError(format!(
81 "Unsupported type {to_index_type:?} for dictionary index"
82 )));
83 }
84 };
85
86 Ok(new_array)
87 }
88 Utf8View => {
89 let dict_array = array
92 .as_dictionary::<K>()
93 .downcast_dict::<StringArray>()
94 .ok_or_else(|| {
95 ArrowError::ComputeError(
96 "Internal Error: Cannot cast Utf8View to StringArray of expected type"
97 .to_string(),
98 )
99 })?;
100
101 let string_view = view_from_dict_values::<K, StringViewType, GenericStringType<i32>>(
102 dict_array.values(),
103 dict_array.keys(),
104 )?;
105 Ok(Arc::new(string_view))
106 }
107 BinaryView => {
108 let dict_array = array
111 .as_dictionary::<K>()
112 .downcast_dict::<BinaryArray>()
113 .ok_or_else(|| {
114 ArrowError::ComputeError(
115 "Internal Error: Cannot cast BinaryView to BinaryArray of expected type"
116 .to_string(),
117 )
118 })?;
119
120 let binary_view = view_from_dict_values::<K, BinaryViewType, BinaryType>(
121 dict_array.values(),
122 dict_array.keys(),
123 )?;
124 Ok(Arc::new(binary_view))
125 }
126 _ => unpack_dictionary::<K>(array, to_type, cast_options),
127 }
128}
129
130fn view_from_dict_values<K: ArrowDictionaryKeyType, T: ByteViewType, V: ByteArrayType>(
131 array: &GenericByteArray<V>,
132 keys: &PrimitiveArray<K>,
133) -> Result<GenericByteViewArray<T>, ArrowError> {
134 let value_buffer = array.values();
135 let value_offsets = array.value_offsets();
136 let mut builder = GenericByteViewBuilder::<T>::with_capacity(keys.len());
137 builder.append_block(value_buffer.clone());
138 for i in keys.iter() {
139 match i {
140 Some(v) => {
141 let idx = v.to_usize().ok_or_else(|| {
142 ArrowError::ComputeError("Invalid dictionary index".to_string())
143 })?;
144
145 unsafe {
149 let offset = value_offsets.get_unchecked(idx).as_usize();
150 let end = value_offsets.get_unchecked(idx + 1).as_usize();
151 let length = end - offset;
152 builder.append_view_unchecked(0, offset as u32, length as u32)
153 }
154 }
155 None => {
156 builder.append_null();
157 }
158 }
159 }
160 Ok(builder.finish())
161}
162
163pub(crate) fn unpack_dictionary<K>(
165 array: &dyn Array,
166 to_type: &DataType,
167 cast_options: &CastOptions,
168) -> Result<ArrayRef, ArrowError>
169where
170 K: ArrowDictionaryKeyType,
171{
172 let dict_array = array.as_dictionary::<K>();
173 let cast_dict_values = cast_with_options(dict_array.values(), to_type, cast_options)?;
174 take(cast_dict_values.as_ref(), dict_array.keys(), None)
175}
176
177pub(crate) fn pack_array_to_dictionary_via_primitive<K: ArrowDictionaryKeyType>(
179 array: &dyn Array,
180 primitive_type: DataType,
181 dict_value_type: &DataType,
182 cast_options: &CastOptions,
183) -> Result<ArrayRef, ArrowError> {
184 let primitive = cast_with_options(array, &primitive_type, cast_options)?;
185 let dict = cast_with_options(
186 primitive.as_ref(),
187 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(primitive_type)),
188 cast_options,
189 )?;
190 cast_with_options(
191 dict.as_ref(),
192 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(dict_value_type.clone())),
193 cast_options,
194 )
195}
196
197pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
202 array: &dyn Array,
203 dict_value_type: &DataType,
204 cast_options: &CastOptions,
205) -> Result<ArrayRef, ArrowError> {
206 use DataType::*;
207
208 match *dict_value_type {
209 Int8 => pack_numeric_to_dictionary::<K, Int8Type>(array, dict_value_type, cast_options),
210 Int16 => pack_numeric_to_dictionary::<K, Int16Type>(array, dict_value_type, cast_options),
211 Int32 => pack_numeric_to_dictionary::<K, Int32Type>(array, dict_value_type, cast_options),
212 Int64 => pack_numeric_to_dictionary::<K, Int64Type>(array, dict_value_type, cast_options),
213 UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(array, dict_value_type, cast_options),
214 UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
215 UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
216 UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
217 Decimal32(p, s) => pack_decimal_to_dictionary::<K, Decimal32Type>(
218 array,
219 dict_value_type,
220 p,
221 s,
222 cast_options,
223 ),
224 Decimal64(p, s) => pack_decimal_to_dictionary::<K, Decimal64Type>(
225 array,
226 dict_value_type,
227 p,
228 s,
229 cast_options,
230 ),
231 Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type>(
232 array,
233 dict_value_type,
234 p,
235 s,
236 cast_options,
237 ),
238 Decimal256(p, s) => pack_decimal_to_dictionary::<K, Decimal256Type>(
239 array,
240 dict_value_type,
241 p,
242 s,
243 cast_options,
244 ),
245 Float16 => {
246 pack_numeric_to_dictionary::<K, Float16Type>(array, dict_value_type, cast_options)
247 }
248 Float32 => {
249 pack_numeric_to_dictionary::<K, Float32Type>(array, dict_value_type, cast_options)
250 }
251 Float64 => {
252 pack_numeric_to_dictionary::<K, Float64Type>(array, dict_value_type, cast_options)
253 }
254 Date32 => pack_array_to_dictionary_via_primitive::<K>(
255 array,
256 DataType::Int32,
257 dict_value_type,
258 cast_options,
259 ),
260 Date64 => pack_array_to_dictionary_via_primitive::<K>(
261 array,
262 DataType::Int64,
263 dict_value_type,
264 cast_options,
265 ),
266 Time32(_) => pack_array_to_dictionary_via_primitive::<K>(
267 array,
268 DataType::Int32,
269 dict_value_type,
270 cast_options,
271 ),
272 Time64(_) => pack_array_to_dictionary_via_primitive::<K>(
273 array,
274 DataType::Int64,
275 dict_value_type,
276 cast_options,
277 ),
278 Timestamp(_, _) => pack_array_to_dictionary_via_primitive::<K>(
279 array,
280 DataType::Int64,
281 dict_value_type,
282 cast_options,
283 ),
284 Utf8 => {
285 if array.data_type() == &DataType::Utf8View {
287 return string_view_to_dictionary::<K, i32>(array);
288 }
289 pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, cast_options)
290 }
291 LargeUtf8 => {
292 if array.data_type() == &DataType::Utf8View {
294 return string_view_to_dictionary::<K, i64>(array);
295 }
296 pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, cast_options)
297 }
298 Binary => {
299 if array.data_type() == &DataType::BinaryView {
301 return binary_view_to_dictionary::<K, i32>(array);
302 }
303 pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, cast_options)
304 }
305 LargeBinary => {
306 if array.data_type() == &DataType::BinaryView {
308 return binary_view_to_dictionary::<K, i64>(array);
309 }
310 pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array, cast_options)
311 }
312 FixedSizeBinary(byte_size) => {
313 pack_byte_to_fixed_size_dictionary::<K>(array, cast_options, byte_size)
314 }
315 _ => Err(ArrowError::CastError(format!(
316 "Unsupported output type for dictionary packing: {dict_value_type:?}"
317 ))),
318 }
319}
320
321pub(crate) fn pack_numeric_to_dictionary<K, V>(
324 array: &dyn Array,
325 dict_value_type: &DataType,
326 cast_options: &CastOptions,
327) -> Result<ArrayRef, ArrowError>
328where
329 K: ArrowDictionaryKeyType,
330 V: ArrowPrimitiveType,
331{
332 let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
334 let values = cast_values.as_primitive::<V>();
335
336 let mut b = PrimitiveDictionaryBuilder::<K, V>::with_capacity(values.len(), values.len());
337
338 for i in 0..values.len() {
340 if values.is_null(i) {
341 b.append_null();
342 } else {
343 b.append(values.value(i))?;
344 }
345 }
346 Ok(Arc::new(b.finish()))
347}
348
349pub(crate) fn pack_decimal_to_dictionary<K, D>(
350 array: &dyn Array,
351 dict_value_type: &DataType,
352 precision: u8,
353 scale: i8,
354 cast_options: &CastOptions,
355) -> Result<ArrayRef, ArrowError>
356where
357 K: ArrowDictionaryKeyType,
358 D: DecimalType + ArrowPrimitiveType,
359{
360 let dict = pack_numeric_to_dictionary::<K, D>(array, dict_value_type, cast_options)?;
361 let dict = dict
362 .as_dictionary::<K>()
363 .downcast_dict::<PrimitiveArray<D>>()
364 .ok_or_else(|| {
365 ArrowError::ComputeError(format!(
366 "Internal Error: Cannot cast dict to {}Array",
367 D::PREFIX
368 ))
369 })?;
370 let value = dict.values().clone();
371 let value = value.with_precision_and_scale(precision, scale)?;
373 Ok(Arc::new(DictionaryArray::<K>::try_new(
374 dict.keys().clone(),
375 Arc::new(value),
376 )?))
377}
378
379pub(crate) fn string_view_to_dictionary<K, O: OffsetSizeTrait>(
380 array: &dyn Array,
381) -> Result<ArrayRef, ArrowError>
382where
383 K: ArrowDictionaryKeyType,
384{
385 let mut b = GenericByteDictionaryBuilder::<K, GenericStringType<O>>::with_capacity(
386 array.len(),
387 1024,
388 1024,
389 );
390 let string_view = array
391 .as_any()
392 .downcast_ref::<StringViewArray>()
393 .ok_or_else(|| {
394 ArrowError::ComputeError("Internal Error: Cannot cast to StringViewArray".to_string())
395 })?;
396 for v in string_view.iter() {
397 match v {
398 Some(v) => {
399 b.append(v)?;
400 }
401 None => {
402 b.append_null();
403 }
404 }
405 }
406
407 Ok(Arc::new(b.finish()))
408}
409
410pub(crate) fn binary_view_to_dictionary<K, O: OffsetSizeTrait>(
411 array: &dyn Array,
412) -> Result<ArrayRef, ArrowError>
413where
414 K: ArrowDictionaryKeyType,
415{
416 let mut b = GenericByteDictionaryBuilder::<K, GenericBinaryType<O>>::with_capacity(
417 array.len(),
418 1024,
419 1024,
420 );
421 let binary_view = array
422 .as_any()
423 .downcast_ref::<BinaryViewArray>()
424 .ok_or_else(|| {
425 ArrowError::ComputeError("Internal Error: Cannot cast to BinaryViewArray".to_string())
426 })?;
427 for v in binary_view.iter() {
428 match v {
429 Some(v) => {
430 b.append(v)?;
431 }
432 None => {
433 b.append_null();
434 }
435 }
436 }
437
438 Ok(Arc::new(b.finish()))
439}
440
441pub(crate) fn pack_byte_to_dictionary<K, T>(
444 array: &dyn Array,
445 cast_options: &CastOptions,
446) -> Result<ArrayRef, ArrowError>
447where
448 K: ArrowDictionaryKeyType,
449 T: ByteArrayType,
450{
451 let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
452 let values = cast_values
453 .as_any()
454 .downcast_ref::<GenericByteArray<T>>()
455 .ok_or_else(|| {
456 ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
457 })?;
458 let mut b = GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 1024, 1024);
459
460 for i in 0..values.len() {
462 if values.is_null(i) {
463 b.append_null();
464 } else {
465 b.append(values.value(i))?;
466 }
467 }
468 Ok(Arc::new(b.finish()))
469}
470
471pub(crate) fn pack_byte_to_fixed_size_dictionary<K>(
474 array: &dyn Array,
475 cast_options: &CastOptions,
476 byte_width: i32,
477) -> Result<ArrayRef, ArrowError>
478where
479 K: ArrowDictionaryKeyType,
480{
481 let cast_values =
482 cast_with_options(array, &DataType::FixedSizeBinary(byte_width), cast_options)?;
483 let values = cast_values
484 .as_any()
485 .downcast_ref::<FixedSizeBinaryArray>()
486 .ok_or_else(|| {
487 ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
488 })?;
489 let mut b = FixedSizeBinaryDictionaryBuilder::<K>::with_capacity(1024, 1024, byte_width);
490
491 for i in 0..values.len() {
493 if values.is_null(i) {
494 b.append_null();
495 } else {
496 b.append(values.value(i))?;
497 }
498 }
499 Ok(Arc::new(b.finish()))
500}