1use crate::cast::*;
19
20pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
25 array: &dyn Array,
26 to_type: &DataType,
27 cast_options: &CastOptions,
28) -> Result<ArrayRef, ArrowError> {
29 use DataType::*;
30
31 let array = array.as_dictionary::<K>();
32 let from_child_type = array.values().data_type();
33 match (from_child_type, to_type) {
34 (_, Dictionary(to_index_type, to_value_type)) => {
35 dictionary_to_dictionary_cast(array, to_index_type, to_value_type, cast_options)
36 }
37 (Utf8, Utf8View) => view_from_dict_values::<K, Utf8Type, StringViewType>(
43 array.keys(),
44 array.values().as_string::<i32>(),
45 ),
46 (Binary, BinaryView) => view_from_dict_values::<K, BinaryType, BinaryViewType>(
47 array.keys(),
48 array.values().as_binary::<i32>(),
49 ),
50 _ => unpack_dictionary(array, to_type, cast_options),
51 }
52}
53
54fn dictionary_to_dictionary_cast<K: ArrowDictionaryKeyType>(
55 array: &DictionaryArray<K>,
56 to_index_type: &DataType,
57 to_value_type: &DataType,
58 cast_options: &CastOptions,
59) -> Result<ArrayRef, ArrowError> {
60 use DataType::*;
61
62 let keys_array: ArrayRef = Arc::new(PrimitiveArray::<K>::from(array.keys().to_data()));
63 let values_array = array.values();
64 let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
65 let cast_values = cast_with_options(values_array, to_value_type, cast_options)?;
66
67 if cast_keys.null_count() > keys_array.null_count() {
70 return Err(ArrowError::ComputeError(format!(
71 "Could not convert {} dictionary indexes from {:?} to {:?}",
72 cast_keys.null_count() - keys_array.null_count(),
73 keys_array.data_type(),
74 to_index_type
75 )));
76 }
77
78 let data = cast_keys.into_data();
79 let builder = data
80 .into_builder()
81 .data_type(Dictionary(
82 Box::new(to_index_type.clone()),
83 Box::new(to_value_type.clone()),
84 ))
85 .child_data(vec![cast_values.into_data()]);
86
87 let data = unsafe { builder.build_unchecked() };
90
91 let new_array: ArrayRef = match to_index_type {
93 Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
94 Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
95 Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
96 Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
97 UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
98 UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
99 UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
100 UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
101 _ => {
102 return Err(ArrowError::CastError(format!(
103 "Unsupported type {to_index_type} for dictionary index"
104 )));
105 }
106 };
107
108 Ok(new_array)
109}
110
111fn view_from_dict_values<K: ArrowDictionaryKeyType, V: ByteArrayType, T: ByteViewType>(
112 keys: &PrimitiveArray<K>,
113 values: &GenericByteArray<V>,
114) -> Result<ArrayRef, ArrowError> {
115 let value_buffer = values.values();
116 let value_offsets = values.value_offsets();
117 let mut builder = GenericByteViewBuilder::<T>::with_capacity(keys.len());
118 builder.append_block(value_buffer.clone());
119 for i in keys.iter() {
120 match i {
121 Some(v) => {
122 let idx = v.to_usize().ok_or_else(|| {
123 ArrowError::ComputeError("Invalid dictionary index".to_string())
124 })?;
125
126 unsafe {
130 let offset = value_offsets.get_unchecked(idx).as_usize();
131 let end = value_offsets.get_unchecked(idx + 1).as_usize();
132 let length = end - offset;
133 builder.append_view_unchecked(0, offset as u32, length as u32)
134 }
135 }
136 None => {
137 builder.append_null();
138 }
139 }
140 }
141 Ok(Arc::new(builder.finish()))
142}
143
144pub(crate) fn unpack_dictionary<K: ArrowDictionaryKeyType>(
146 array: &DictionaryArray<K>,
147 to_type: &DataType,
148 cast_options: &CastOptions,
149) -> Result<ArrayRef, ArrowError> {
150 let cast_dict_values = cast_with_options(array.values(), to_type, cast_options)?;
151 take(cast_dict_values.as_ref(), array.keys(), None)
152}
153
154pub(crate) fn pack_array_to_dictionary_via_primitive<K: ArrowDictionaryKeyType>(
156 array: &dyn Array,
157 primitive_type: DataType,
158 dict_value_type: &DataType,
159 cast_options: &CastOptions,
160) -> Result<ArrayRef, ArrowError> {
161 let primitive = cast_with_options(array, &primitive_type, cast_options)?;
162 let dict = cast_with_options(
163 primitive.as_ref(),
164 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(primitive_type)),
165 cast_options,
166 )?;
167 cast_with_options(
168 dict.as_ref(),
169 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(dict_value_type.clone())),
170 cast_options,
171 )
172}
173
174pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
179 array: &dyn Array,
180 dict_value_type: &DataType,
181 cast_options: &CastOptions,
182) -> Result<ArrayRef, ArrowError> {
183 use DataType::*;
184
185 match *dict_value_type {
186 Int8 => pack_numeric_to_dictionary::<K, Int8Type>(array, dict_value_type, cast_options),
187 Int16 => pack_numeric_to_dictionary::<K, Int16Type>(array, dict_value_type, cast_options),
188 Int32 => pack_numeric_to_dictionary::<K, Int32Type>(array, dict_value_type, cast_options),
189 Int64 => pack_numeric_to_dictionary::<K, Int64Type>(array, dict_value_type, cast_options),
190 UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(array, dict_value_type, cast_options),
191 UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
192 UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
193 UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
194 Decimal32(p, s) => pack_decimal_to_dictionary::<K, Decimal32Type>(
195 array,
196 dict_value_type,
197 p,
198 s,
199 cast_options,
200 ),
201 Decimal64(p, s) => pack_decimal_to_dictionary::<K, Decimal64Type>(
202 array,
203 dict_value_type,
204 p,
205 s,
206 cast_options,
207 ),
208 Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type>(
209 array,
210 dict_value_type,
211 p,
212 s,
213 cast_options,
214 ),
215 Decimal256(p, s) => pack_decimal_to_dictionary::<K, Decimal256Type>(
216 array,
217 dict_value_type,
218 p,
219 s,
220 cast_options,
221 ),
222 Float16 => {
223 pack_numeric_to_dictionary::<K, Float16Type>(array, dict_value_type, cast_options)
224 }
225 Float32 => {
226 pack_numeric_to_dictionary::<K, Float32Type>(array, dict_value_type, cast_options)
227 }
228 Float64 => {
229 pack_numeric_to_dictionary::<K, Float64Type>(array, dict_value_type, cast_options)
230 }
231 Date32 => pack_array_to_dictionary_via_primitive::<K>(
232 array,
233 DataType::Int32,
234 dict_value_type,
235 cast_options,
236 ),
237 Date64 => pack_array_to_dictionary_via_primitive::<K>(
238 array,
239 DataType::Int64,
240 dict_value_type,
241 cast_options,
242 ),
243 Time32(_) => pack_array_to_dictionary_via_primitive::<K>(
244 array,
245 DataType::Int32,
246 dict_value_type,
247 cast_options,
248 ),
249 Time64(_) => pack_array_to_dictionary_via_primitive::<K>(
250 array,
251 DataType::Int64,
252 dict_value_type,
253 cast_options,
254 ),
255 Timestamp(_, _) => pack_array_to_dictionary_via_primitive::<K>(
256 array,
257 DataType::Int64,
258 dict_value_type,
259 cast_options,
260 ),
261 Utf8 => {
262 if array.data_type() == &DataType::Utf8View {
264 return string_view_to_dictionary::<K, i32>(array);
265 }
266 pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, cast_options)
267 }
268 LargeUtf8 => {
269 if array.data_type() == &DataType::Utf8View {
271 return string_view_to_dictionary::<K, i64>(array);
272 }
273 pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, cast_options)
274 }
275 Binary => {
276 if array.data_type() == &DataType::BinaryView {
278 return binary_view_to_dictionary::<K, i32>(array);
279 }
280 pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, cast_options)
281 }
282 LargeBinary => {
283 if array.data_type() == &DataType::BinaryView {
285 return binary_view_to_dictionary::<K, i64>(array);
286 }
287 pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array, cast_options)
288 }
289 FixedSizeBinary(byte_size) => {
290 pack_byte_to_fixed_size_dictionary::<K>(array, cast_options, byte_size)
291 }
292 _ => Err(ArrowError::CastError(format!(
293 "Unsupported output type for dictionary packing: {dict_value_type}"
294 ))),
295 }
296}
297
298pub(crate) fn pack_numeric_to_dictionary<K, V>(
301 array: &dyn Array,
302 dict_value_type: &DataType,
303 cast_options: &CastOptions,
304) -> Result<ArrayRef, ArrowError>
305where
306 K: ArrowDictionaryKeyType,
307 V: ArrowPrimitiveType,
308{
309 let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
311 let values = cast_values.as_primitive::<V>();
312
313 let mut b = PrimitiveDictionaryBuilder::<K, V>::with_capacity(values.len(), values.len());
314
315 for i in 0..values.len() {
317 if values.is_null(i) {
318 b.append_null();
319 } else {
320 b.append(values.value(i))?;
321 }
322 }
323 Ok(Arc::new(b.finish()))
324}
325
326pub(crate) fn pack_decimal_to_dictionary<K, D>(
327 array: &dyn Array,
328 dict_value_type: &DataType,
329 precision: u8,
330 scale: i8,
331 cast_options: &CastOptions,
332) -> Result<ArrayRef, ArrowError>
333where
334 K: ArrowDictionaryKeyType,
335 D: DecimalType + ArrowPrimitiveType,
336{
337 let dict = pack_numeric_to_dictionary::<K, D>(array, dict_value_type, cast_options)?;
338 let dict = dict
339 .as_dictionary::<K>()
340 .downcast_dict::<PrimitiveArray<D>>()
341 .ok_or_else(|| {
342 ArrowError::ComputeError(format!(
343 "Internal Error: Cannot cast dict to {}Array",
344 D::PREFIX
345 ))
346 })?;
347 let value = dict.values().clone();
348 let value = value.with_precision_and_scale(precision, scale)?;
350 Ok(Arc::new(DictionaryArray::<K>::try_new(
351 dict.keys().clone(),
352 Arc::new(value),
353 )?))
354}
355
356pub(crate) fn string_view_to_dictionary<K, O: OffsetSizeTrait>(
357 array: &dyn Array,
358) -> Result<ArrayRef, ArrowError>
359where
360 K: ArrowDictionaryKeyType,
361{
362 let mut b = GenericByteDictionaryBuilder::<K, GenericStringType<O>>::with_capacity(
363 array.len(),
364 1024,
365 1024,
366 );
367 let string_view = array
368 .as_any()
369 .downcast_ref::<StringViewArray>()
370 .ok_or_else(|| {
371 ArrowError::ComputeError("Internal Error: Cannot cast to StringViewArray".to_string())
372 })?;
373 for v in string_view.iter() {
374 match v {
375 Some(v) => {
376 b.append(v)?;
377 }
378 None => {
379 b.append_null();
380 }
381 }
382 }
383
384 Ok(Arc::new(b.finish()))
385}
386
387pub(crate) fn binary_view_to_dictionary<K, O: OffsetSizeTrait>(
388 array: &dyn Array,
389) -> Result<ArrayRef, ArrowError>
390where
391 K: ArrowDictionaryKeyType,
392{
393 let mut b = GenericByteDictionaryBuilder::<K, GenericBinaryType<O>>::with_capacity(
394 array.len(),
395 1024,
396 1024,
397 );
398 let binary_view = array
399 .as_any()
400 .downcast_ref::<BinaryViewArray>()
401 .ok_or_else(|| {
402 ArrowError::ComputeError("Internal Error: Cannot cast to BinaryViewArray".to_string())
403 })?;
404 for v in binary_view.iter() {
405 match v {
406 Some(v) => {
407 b.append(v)?;
408 }
409 None => {
410 b.append_null();
411 }
412 }
413 }
414
415 Ok(Arc::new(b.finish()))
416}
417
418pub(crate) fn pack_byte_to_dictionary<K, T>(
421 array: &dyn Array,
422 cast_options: &CastOptions,
423) -> Result<ArrayRef, ArrowError>
424where
425 K: ArrowDictionaryKeyType,
426 T: ByteArrayType,
427{
428 let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
429 let values = cast_values
430 .as_any()
431 .downcast_ref::<GenericByteArray<T>>()
432 .ok_or_else(|| {
433 ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
434 })?;
435 let mut b = GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 1024, 1024);
436
437 for i in 0..values.len() {
439 if values.is_null(i) {
440 b.append_null();
441 } else {
442 b.append(values.value(i))?;
443 }
444 }
445 Ok(Arc::new(b.finish()))
446}
447
448pub(crate) fn pack_byte_to_fixed_size_dictionary<K>(
451 array: &dyn Array,
452 cast_options: &CastOptions,
453 byte_width: i32,
454) -> Result<ArrayRef, ArrowError>
455where
456 K: ArrowDictionaryKeyType,
457{
458 let cast_values =
459 cast_with_options(array, &DataType::FixedSizeBinary(byte_width), cast_options)?;
460 let values = cast_values
461 .as_any()
462 .downcast_ref::<FixedSizeBinaryArray>()
463 .ok_or_else(|| {
464 ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
465 })?;
466 let mut b = FixedSizeBinaryDictionaryBuilder::<K>::with_capacity(1024, 1024, byte_width);
467
468 for i in 0..values.len() {
470 if values.is_null(i) {
471 b.append_null();
472 } else {
473 b.append(values.value(i))?;
474 }
475 }
476 Ok(Arc::new(b.finish()))
477}