1use crate::cast::*;
19
20pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
25 array: &dyn Array,
26 to_type: &DataType,
27 cast_options: &CastOptions,
28) -> Result<ArrayRef, ArrowError> {
29 use DataType::*;
30
31 let array = array.as_dictionary::<K>();
32 let from_child_type = array.values().data_type();
33 match (from_child_type, to_type) {
34 (_, Dictionary(to_index_type, to_value_type)) => {
35 dictionary_to_dictionary_cast(array, to_index_type, to_value_type, cast_options)
36 }
37 (Utf8, Utf8View) => view_from_dict_values::<K, Utf8Type, StringViewType>(
43 array.keys(),
44 array.values().as_string::<i32>(),
45 ),
46 (Binary, BinaryView) => view_from_dict_values::<K, BinaryType, BinaryViewType>(
47 array.keys(),
48 array.values().as_binary::<i32>(),
49 ),
50 _ => unpack_dictionary(array, to_type, cast_options),
51 }
52}
53
54fn dictionary_to_dictionary_cast<K: ArrowDictionaryKeyType>(
55 array: &DictionaryArray<K>,
56 to_index_type: &DataType,
57 to_value_type: &DataType,
58 cast_options: &CastOptions,
59) -> Result<ArrayRef, ArrowError> {
60 use DataType::*;
61
62 if matches!(array.values().data_type(), Dictionary(_, _)) {
69 let flattened = take(array.values().as_ref(), array.keys(), None)?;
70 return cast_with_options(
71 &flattened,
72 &Dictionary(
73 Box::new(to_index_type.clone()),
74 Box::new(to_value_type.clone()),
75 ),
76 cast_options,
77 );
78 }
79
80 let keys_array: ArrayRef = Arc::new(PrimitiveArray::<K>::from(array.keys().to_data()));
81 let values_array = array.values();
82 let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
83 let cast_values = cast_with_options(values_array, to_value_type, cast_options)?;
84
85 if cast_keys.null_count() > keys_array.null_count() {
88 return Err(ArrowError::ComputeError(format!(
89 "Could not convert {} dictionary indexes from {:?} to {:?}",
90 cast_keys.null_count() - keys_array.null_count(),
91 keys_array.data_type(),
92 to_index_type
93 )));
94 }
95
96 let data = cast_keys.into_data();
97 let builder = data
98 .into_builder()
99 .data_type(Dictionary(
100 Box::new(to_index_type.clone()),
101 Box::new(to_value_type.clone()),
102 ))
103 .child_data(vec![cast_values.into_data()]);
104
105 let data = unsafe { builder.build_unchecked() };
108
109 let new_array: ArrayRef = match to_index_type {
111 Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
112 Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
113 Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
114 Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
115 UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
116 UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
117 UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
118 UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
119 _ => {
120 return Err(ArrowError::CastError(format!(
121 "Unsupported type {to_index_type} for dictionary index"
122 )));
123 }
124 };
125
126 Ok(new_array)
127}
128
129fn view_from_dict_values<K: ArrowDictionaryKeyType, V: ByteArrayType, T: ByteViewType>(
130 keys: &PrimitiveArray<K>,
131 values: &GenericByteArray<V>,
132) -> Result<ArrayRef, ArrowError> {
133 let value_buffer = values.values();
134 let value_offsets = values.value_offsets();
135 let mut builder = GenericByteViewBuilder::<T>::with_capacity(keys.len());
136 builder.append_block(value_buffer.clone());
137 for i in keys.iter() {
138 match i {
139 Some(v) => {
140 let idx = v.to_usize().ok_or_else(|| {
141 ArrowError::ComputeError("Invalid dictionary index".to_string())
142 })?;
143
144 unsafe {
148 let offset = value_offsets.get_unchecked(idx).as_usize();
149 let end = value_offsets.get_unchecked(idx + 1).as_usize();
150 let length = end - offset;
151 builder.append_view_unchecked(0, offset as u32, length as u32)
152 }
153 }
154 None => {
155 builder.append_null();
156 }
157 }
158 }
159 Ok(Arc::new(builder.finish()))
160}
161
162pub(crate) fn unpack_dictionary<K: ArrowDictionaryKeyType>(
164 array: &DictionaryArray<K>,
165 to_type: &DataType,
166 cast_options: &CastOptions,
167) -> Result<ArrayRef, ArrowError> {
168 let cast_dict_values = cast_with_options(array.values(), to_type, cast_options)?;
169 take(cast_dict_values.as_ref(), array.keys(), None)
170}
171
172pub(crate) fn pack_array_to_dictionary_via_primitive<K: ArrowDictionaryKeyType>(
174 array: &dyn Array,
175 primitive_type: DataType,
176 dict_value_type: &DataType,
177 cast_options: &CastOptions,
178) -> Result<ArrayRef, ArrowError> {
179 let primitive = cast_with_options(array, &primitive_type, cast_options)?;
180 let dict = cast_with_options(
181 primitive.as_ref(),
182 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(primitive_type)),
183 cast_options,
184 )?;
185 cast_with_options(
186 dict.as_ref(),
187 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(dict_value_type.clone())),
188 cast_options,
189 )
190}
191
192pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
197 array: &dyn Array,
198 dict_value_type: &DataType,
199 cast_options: &CastOptions,
200) -> Result<ArrayRef, ArrowError> {
201 use DataType::*;
202
203 match *dict_value_type {
204 Int8 => pack_numeric_to_dictionary::<K, Int8Type>(array, dict_value_type, cast_options),
205 Int16 => pack_numeric_to_dictionary::<K, Int16Type>(array, dict_value_type, cast_options),
206 Int32 => pack_numeric_to_dictionary::<K, Int32Type>(array, dict_value_type, cast_options),
207 Int64 => pack_numeric_to_dictionary::<K, Int64Type>(array, dict_value_type, cast_options),
208 UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(array, dict_value_type, cast_options),
209 UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
210 UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
211 UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
212 Decimal32(p, s) => pack_decimal_to_dictionary::<K, Decimal32Type>(
213 array,
214 dict_value_type,
215 p,
216 s,
217 cast_options,
218 ),
219 Decimal64(p, s) => pack_decimal_to_dictionary::<K, Decimal64Type>(
220 array,
221 dict_value_type,
222 p,
223 s,
224 cast_options,
225 ),
226 Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type>(
227 array,
228 dict_value_type,
229 p,
230 s,
231 cast_options,
232 ),
233 Decimal256(p, s) => pack_decimal_to_dictionary::<K, Decimal256Type>(
234 array,
235 dict_value_type,
236 p,
237 s,
238 cast_options,
239 ),
240 Float16 => {
241 pack_numeric_to_dictionary::<K, Float16Type>(array, dict_value_type, cast_options)
242 }
243 Float32 => {
244 pack_numeric_to_dictionary::<K, Float32Type>(array, dict_value_type, cast_options)
245 }
246 Float64 => {
247 pack_numeric_to_dictionary::<K, Float64Type>(array, dict_value_type, cast_options)
248 }
249 Date32 => pack_array_to_dictionary_via_primitive::<K>(
250 array,
251 DataType::Int32,
252 dict_value_type,
253 cast_options,
254 ),
255 Date64 => pack_array_to_dictionary_via_primitive::<K>(
256 array,
257 DataType::Int64,
258 dict_value_type,
259 cast_options,
260 ),
261 Time32(_) => pack_array_to_dictionary_via_primitive::<K>(
262 array,
263 DataType::Int32,
264 dict_value_type,
265 cast_options,
266 ),
267 Time64(_) => pack_array_to_dictionary_via_primitive::<K>(
268 array,
269 DataType::Int64,
270 dict_value_type,
271 cast_options,
272 ),
273 Timestamp(_, _) => pack_array_to_dictionary_via_primitive::<K>(
274 array,
275 DataType::Int64,
276 dict_value_type,
277 cast_options,
278 ),
279 Utf8 => {
280 if array.data_type() == &DataType::Utf8View {
282 return string_view_to_dictionary::<K, i32>(array);
283 }
284 pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, cast_options)
285 }
286 LargeUtf8 => {
287 if array.data_type() == &DataType::Utf8View {
289 return string_view_to_dictionary::<K, i64>(array);
290 }
291 pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, cast_options)
292 }
293 Utf8View => {
294 let base_value_type = match array.data_type() {
295 DataType::LargeUtf8 | DataType::Utf8View => DataType::LargeUtf8,
296 _ => DataType::Utf8,
297 };
298
299 let dict_base = cast_to_dictionary::<K>(array, &base_value_type, cast_options)?;
300 dictionary_cast::<K>(
301 dict_base.as_ref(),
302 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8View)),
303 cast_options,
304 )
305 }
306 Binary => {
307 if array.data_type() == &DataType::BinaryView {
309 return binary_view_to_dictionary::<K, i32>(array);
310 }
311 pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, cast_options)
312 }
313 LargeBinary => {
314 if array.data_type() == &DataType::BinaryView {
316 return binary_view_to_dictionary::<K, i64>(array);
317 }
318 pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array, cast_options)
319 }
320 BinaryView => {
321 let base_value_type = match array.data_type() {
322 DataType::LargeBinary | DataType::BinaryView => DataType::LargeBinary,
323 _ => DataType::Binary,
324 };
325
326 let dict_base = cast_to_dictionary::<K>(array, &base_value_type, cast_options)?;
327 dictionary_cast::<K>(
328 dict_base.as_ref(),
329 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::BinaryView)),
330 cast_options,
331 )
332 }
333 FixedSizeBinary(byte_size) => {
334 pack_byte_to_fixed_size_dictionary::<K>(array, cast_options, byte_size)
335 }
336 Struct(_) => pack_struct_to_dictionary::<K>(array, dict_value_type, cast_options),
337 _ => Err(ArrowError::CastError(format!(
338 "Unsupported output type for dictionary packing: {dict_value_type}"
339 ))),
340 }
341}
342
343fn pack_struct_to_dictionary<K: ArrowDictionaryKeyType>(
353 array: &dyn Array,
354 dict_value_type: &DataType,
355 cast_options: &CastOptions,
356) -> Result<ArrayRef, ArrowError> {
357 let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
358 let len = cast_values.len();
359
360 let mut builder = PrimitiveBuilder::<K>::with_capacity(len);
363 for i in 0..len {
364 if cast_values.is_null(i) {
365 builder.append_null();
366 } else {
367 let key = K::Native::from_usize(i).ok_or_else(|| {
368 ArrowError::CastError(format!(
369 "Cannot fit {len} dictionary keys in {:?}",
370 K::DATA_TYPE,
371 ))
372 })?;
373 builder.append_value(key);
374 }
375 }
376 let keys = builder.finish();
377
378 Ok(Arc::new(DictionaryArray::<K>::try_new(keys, cast_values)?))
379}
380
381pub(crate) fn pack_numeric_to_dictionary<K, V>(
384 array: &dyn Array,
385 dict_value_type: &DataType,
386 cast_options: &CastOptions,
387) -> Result<ArrayRef, ArrowError>
388where
389 K: ArrowDictionaryKeyType,
390 V: ArrowPrimitiveType,
391{
392 let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
394 let values = cast_values.as_primitive::<V>();
395
396 let mut b = PrimitiveDictionaryBuilder::<K, V>::with_capacity(values.len(), values.len());
397
398 for i in 0..values.len() {
400 if values.is_null(i) {
401 b.append_null();
402 } else {
403 b.append(values.value(i))?;
404 }
405 }
406 Ok(Arc::new(b.finish()))
407}
408
409pub(crate) fn pack_decimal_to_dictionary<K, D>(
410 array: &dyn Array,
411 dict_value_type: &DataType,
412 precision: u8,
413 scale: i8,
414 cast_options: &CastOptions,
415) -> Result<ArrayRef, ArrowError>
416where
417 K: ArrowDictionaryKeyType,
418 D: DecimalType + ArrowPrimitiveType,
419{
420 let dict = pack_numeric_to_dictionary::<K, D>(array, dict_value_type, cast_options)?;
421 let dict = dict.as_dictionary::<K>();
422 let typed = dict.downcast_dict::<PrimitiveArray<D>>().ok_or_else(|| {
423 ArrowError::ComputeError(format!(
424 "Internal Error: Cannot cast dict to {}Array",
425 D::PREFIX
426 ))
427 })?;
428 let value = typed
429 .values()
430 .clone()
431 .with_precision_and_scale(precision, scale)?;
432 Ok(Arc::new(dict.with_values(Arc::new(value))))
433}
434
435pub(crate) fn string_view_to_dictionary<K, O: OffsetSizeTrait>(
436 array: &dyn Array,
437) -> Result<ArrayRef, ArrowError>
438where
439 K: ArrowDictionaryKeyType,
440{
441 let mut b = GenericByteDictionaryBuilder::<K, GenericStringType<O>>::with_capacity(
442 array.len(),
443 1024,
444 1024,
445 );
446 let string_view = array
447 .as_any()
448 .downcast_ref::<StringViewArray>()
449 .ok_or_else(|| {
450 ArrowError::ComputeError("Internal Error: Cannot cast to StringViewArray".to_string())
451 })?;
452 for v in string_view.iter() {
453 match v {
454 Some(v) => {
455 b.append(v)?;
456 }
457 None => {
458 b.append_null();
459 }
460 }
461 }
462
463 Ok(Arc::new(b.finish()))
464}
465
466pub(crate) fn binary_view_to_dictionary<K, O: OffsetSizeTrait>(
467 array: &dyn Array,
468) -> Result<ArrayRef, ArrowError>
469where
470 K: ArrowDictionaryKeyType,
471{
472 let mut b = GenericByteDictionaryBuilder::<K, GenericBinaryType<O>>::with_capacity(
473 array.len(),
474 1024,
475 1024,
476 );
477 let binary_view = array
478 .as_any()
479 .downcast_ref::<BinaryViewArray>()
480 .ok_or_else(|| {
481 ArrowError::ComputeError("Internal Error: Cannot cast to BinaryViewArray".to_string())
482 })?;
483 for v in binary_view.iter() {
484 match v {
485 Some(v) => {
486 b.append(v)?;
487 }
488 None => {
489 b.append_null();
490 }
491 }
492 }
493
494 Ok(Arc::new(b.finish()))
495}
496
497pub(crate) fn pack_byte_to_dictionary<K, T>(
500 array: &dyn Array,
501 cast_options: &CastOptions,
502) -> Result<ArrayRef, ArrowError>
503where
504 K: ArrowDictionaryKeyType,
505 T: ByteArrayType,
506{
507 let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
508 let values = cast_values
509 .as_any()
510 .downcast_ref::<GenericByteArray<T>>()
511 .ok_or_else(|| {
512 ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
513 })?;
514 let mut b = GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 1024, 1024);
515
516 for i in 0..values.len() {
518 if values.is_null(i) {
519 b.append_null();
520 } else {
521 b.append(values.value(i))?;
522 }
523 }
524 Ok(Arc::new(b.finish()))
525}
526
527pub(crate) fn pack_byte_to_fixed_size_dictionary<K>(
530 array: &dyn Array,
531 cast_options: &CastOptions,
532 byte_width: i32,
533) -> Result<ArrayRef, ArrowError>
534where
535 K: ArrowDictionaryKeyType,
536{
537 let cast_values =
538 cast_with_options(array, &DataType::FixedSizeBinary(byte_width), cast_options)?;
539 let values = cast_values
540 .as_any()
541 .downcast_ref::<FixedSizeBinaryArray>()
542 .ok_or_else(|| {
543 ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
544 })?;
545 let mut b = FixedSizeBinaryDictionaryBuilder::<K>::with_capacity(1024, 1024, byte_width);
546
547 for i in 0..values.len() {
549 if values.is_null(i) {
550 b.append_null();
551 } else {
552 b.append(values.value(i))?;
553 }
554 }
555 Ok(Arc::new(b.finish()))
556}