1use crate::cast::*;
19
20pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
25 array: &dyn Array,
26 to_type: &DataType,
27 cast_options: &CastOptions,
28) -> Result<ArrayRef, ArrowError> {
29 use DataType::*;
30
31 match to_type {
32 Dictionary(to_index_type, to_value_type) => {
33 let dict_array = array
34 .as_any()
35 .downcast_ref::<DictionaryArray<K>>()
36 .ok_or_else(|| {
37 ArrowError::ComputeError(
38 "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
39 )
40 })?;
41
42 let keys_array: ArrayRef =
43 Arc::new(PrimitiveArray::<K>::from(dict_array.keys().to_data()));
44 let values_array = dict_array.values();
45 let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
46 let cast_values = cast_with_options(values_array, to_value_type, cast_options)?;
47
48 if cast_keys.null_count() > keys_array.null_count() {
51 return Err(ArrowError::ComputeError(format!(
52 "Could not convert {} dictionary indexes from {:?} to {:?}",
53 cast_keys.null_count() - keys_array.null_count(),
54 keys_array.data_type(),
55 to_index_type
56 )));
57 }
58
59 let data = cast_keys.into_data();
60 let builder = data
61 .into_builder()
62 .data_type(to_type.clone())
63 .child_data(vec![cast_values.into_data()]);
64
65 let data = unsafe { builder.build_unchecked() };
68
69 let new_array: ArrayRef = match **to_index_type {
71 Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
72 Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
73 Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
74 Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
75 UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
76 UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
77 UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
78 UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
79 _ => {
80 return Err(ArrowError::CastError(format!(
81 "Unsupported type {to_index_type:?} for dictionary index"
82 )));
83 }
84 };
85
86 Ok(new_array)
87 }
88 Utf8View => {
89 let dict_array = array
92 .as_dictionary::<K>()
93 .downcast_dict::<StringArray>()
94 .ok_or_else(|| {
95 ArrowError::ComputeError(
96 "Internal Error: Cannot cast Utf8View to StringArray of expected type"
97 .to_string(),
98 )
99 })?;
100
101 let string_view = view_from_dict_values::<K, StringViewType, GenericStringType<i32>>(
102 dict_array.values(),
103 dict_array.keys(),
104 )?;
105 Ok(Arc::new(string_view))
106 }
107 BinaryView => {
108 let dict_array = array
111 .as_dictionary::<K>()
112 .downcast_dict::<BinaryArray>()
113 .ok_or_else(|| {
114 ArrowError::ComputeError(
115 "Internal Error: Cannot cast BinaryView to BinaryArray of expected type"
116 .to_string(),
117 )
118 })?;
119
120 let binary_view = view_from_dict_values::<K, BinaryViewType, BinaryType>(
121 dict_array.values(),
122 dict_array.keys(),
123 )?;
124 Ok(Arc::new(binary_view))
125 }
126 _ => unpack_dictionary::<K>(array, to_type, cast_options),
127 }
128}
129
130fn view_from_dict_values<K: ArrowDictionaryKeyType, T: ByteViewType, V: ByteArrayType>(
131 array: &GenericByteArray<V>,
132 keys: &PrimitiveArray<K>,
133) -> Result<GenericByteViewArray<T>, ArrowError> {
134 let value_buffer = array.values();
135 let value_offsets = array.value_offsets();
136 let mut builder = GenericByteViewBuilder::<T>::with_capacity(keys.len());
137 builder.append_block(value_buffer.clone());
138 for i in keys.iter() {
139 match i {
140 Some(v) => {
141 let idx = v.to_usize().ok_or_else(|| {
142 ArrowError::ComputeError("Invalid dictionary index".to_string())
143 })?;
144
145 unsafe {
149 let offset = value_offsets.get_unchecked(idx).as_usize();
150 let end = value_offsets.get_unchecked(idx + 1).as_usize();
151 let length = end - offset;
152 builder.append_view_unchecked(0, offset as u32, length as u32)
153 }
154 }
155 None => {
156 builder.append_null();
157 }
158 }
159 }
160 Ok(builder.finish())
161}
162
163pub(crate) fn unpack_dictionary<K>(
165 array: &dyn Array,
166 to_type: &DataType,
167 cast_options: &CastOptions,
168) -> Result<ArrayRef, ArrowError>
169where
170 K: ArrowDictionaryKeyType,
171{
172 let dict_array = array.as_dictionary::<K>();
173 let cast_dict_values = cast_with_options(dict_array.values(), to_type, cast_options)?;
174 take(cast_dict_values.as_ref(), dict_array.keys(), None)
175}
176
177pub(crate) fn pack_array_to_dictionary_via_primitive<K: ArrowDictionaryKeyType>(
179 array: &dyn Array,
180 primitive_type: DataType,
181 dict_value_type: &DataType,
182 cast_options: &CastOptions,
183) -> Result<ArrayRef, ArrowError> {
184 let primitive = cast_with_options(array, &primitive_type, cast_options)?;
185 let dict = cast_with_options(
186 primitive.as_ref(),
187 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(primitive_type)),
188 cast_options,
189 )?;
190 cast_with_options(
191 dict.as_ref(),
192 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(dict_value_type.clone())),
193 cast_options,
194 )
195}
196
197pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
202 array: &dyn Array,
203 dict_value_type: &DataType,
204 cast_options: &CastOptions,
205) -> Result<ArrayRef, ArrowError> {
206 use DataType::*;
207
208 match *dict_value_type {
209 Int8 => pack_numeric_to_dictionary::<K, Int8Type>(array, dict_value_type, cast_options),
210 Int16 => pack_numeric_to_dictionary::<K, Int16Type>(array, dict_value_type, cast_options),
211 Int32 => pack_numeric_to_dictionary::<K, Int32Type>(array, dict_value_type, cast_options),
212 Int64 => pack_numeric_to_dictionary::<K, Int64Type>(array, dict_value_type, cast_options),
213 UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(array, dict_value_type, cast_options),
214 UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
215 UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
216 UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
217 Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type>(
218 array,
219 dict_value_type,
220 p,
221 s,
222 cast_options,
223 ),
224 Decimal256(p, s) => pack_decimal_to_dictionary::<K, Decimal256Type>(
225 array,
226 dict_value_type,
227 p,
228 s,
229 cast_options,
230 ),
231 Float16 => {
232 pack_numeric_to_dictionary::<K, Float16Type>(array, dict_value_type, cast_options)
233 }
234 Float32 => {
235 pack_numeric_to_dictionary::<K, Float32Type>(array, dict_value_type, cast_options)
236 }
237 Float64 => {
238 pack_numeric_to_dictionary::<K, Float64Type>(array, dict_value_type, cast_options)
239 }
240 Date32 => pack_array_to_dictionary_via_primitive::<K>(
241 array,
242 DataType::Int32,
243 dict_value_type,
244 cast_options,
245 ),
246 Date64 => pack_array_to_dictionary_via_primitive::<K>(
247 array,
248 DataType::Int64,
249 dict_value_type,
250 cast_options,
251 ),
252 Time32(_) => pack_array_to_dictionary_via_primitive::<K>(
253 array,
254 DataType::Int32,
255 dict_value_type,
256 cast_options,
257 ),
258 Time64(_) => pack_array_to_dictionary_via_primitive::<K>(
259 array,
260 DataType::Int64,
261 dict_value_type,
262 cast_options,
263 ),
264 Timestamp(_, _) => pack_array_to_dictionary_via_primitive::<K>(
265 array,
266 DataType::Int64,
267 dict_value_type,
268 cast_options,
269 ),
270 Utf8 => {
271 if array.data_type() == &DataType::Utf8View {
273 return string_view_to_dictionary::<K, i32>(array);
274 }
275 pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, cast_options)
276 }
277 LargeUtf8 => {
278 if array.data_type() == &DataType::Utf8View {
280 return string_view_to_dictionary::<K, i64>(array);
281 }
282 pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, cast_options)
283 }
284 Binary => {
285 if array.data_type() == &DataType::BinaryView {
287 return binary_view_to_dictionary::<K, i32>(array);
288 }
289 pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, cast_options)
290 }
291 LargeBinary => {
292 if array.data_type() == &DataType::BinaryView {
294 return binary_view_to_dictionary::<K, i64>(array);
295 }
296 pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array, cast_options)
297 }
298 FixedSizeBinary(byte_size) => {
299 pack_byte_to_fixed_size_dictionary::<K>(array, cast_options, byte_size)
300 }
301 _ => Err(ArrowError::CastError(format!(
302 "Unsupported output type for dictionary packing: {dict_value_type:?}"
303 ))),
304 }
305}
306
307pub(crate) fn pack_numeric_to_dictionary<K, V>(
310 array: &dyn Array,
311 dict_value_type: &DataType,
312 cast_options: &CastOptions,
313) -> Result<ArrayRef, ArrowError>
314where
315 K: ArrowDictionaryKeyType,
316 V: ArrowPrimitiveType,
317{
318 let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
320 let values = cast_values.as_primitive::<V>();
321
322 let mut b = PrimitiveDictionaryBuilder::<K, V>::with_capacity(values.len(), values.len());
323
324 for i in 0..values.len() {
326 if values.is_null(i) {
327 b.append_null();
328 } else {
329 b.append(values.value(i))?;
330 }
331 }
332 Ok(Arc::new(b.finish()))
333}
334
335pub(crate) fn pack_decimal_to_dictionary<K, D>(
336 array: &dyn Array,
337 dict_value_type: &DataType,
338 precision: u8,
339 scale: i8,
340 cast_options: &CastOptions,
341) -> Result<ArrayRef, ArrowError>
342where
343 K: ArrowDictionaryKeyType,
344 D: DecimalType + ArrowPrimitiveType,
345{
346 let dict = pack_numeric_to_dictionary::<K, D>(array, dict_value_type, cast_options)?;
347 let dict = dict
348 .as_dictionary::<K>()
349 .downcast_dict::<PrimitiveArray<D>>()
350 .ok_or_else(|| {
351 ArrowError::ComputeError(format!(
352 "Internal Error: Cannot cast dict to {}Array",
353 D::PREFIX
354 ))
355 })?;
356 let value = dict.values().clone();
357 let value = value.with_precision_and_scale(precision, scale)?;
359 Ok(Arc::new(DictionaryArray::<K>::try_new(
360 dict.keys().clone(),
361 Arc::new(value),
362 )?))
363}
364
365pub(crate) fn string_view_to_dictionary<K, O: OffsetSizeTrait>(
366 array: &dyn Array,
367) -> Result<ArrayRef, ArrowError>
368where
369 K: ArrowDictionaryKeyType,
370{
371 let mut b = GenericByteDictionaryBuilder::<K, GenericStringType<O>>::with_capacity(
372 array.len(),
373 1024,
374 1024,
375 );
376 let string_view = array
377 .as_any()
378 .downcast_ref::<StringViewArray>()
379 .ok_or_else(|| {
380 ArrowError::ComputeError("Internal Error: Cannot cast to StringViewArray".to_string())
381 })?;
382 for v in string_view.iter() {
383 match v {
384 Some(v) => {
385 b.append(v)?;
386 }
387 None => {
388 b.append_null();
389 }
390 }
391 }
392
393 Ok(Arc::new(b.finish()))
394}
395
396pub(crate) fn binary_view_to_dictionary<K, O: OffsetSizeTrait>(
397 array: &dyn Array,
398) -> Result<ArrayRef, ArrowError>
399where
400 K: ArrowDictionaryKeyType,
401{
402 let mut b = GenericByteDictionaryBuilder::<K, GenericBinaryType<O>>::with_capacity(
403 array.len(),
404 1024,
405 1024,
406 );
407 let binary_view = array
408 .as_any()
409 .downcast_ref::<BinaryViewArray>()
410 .ok_or_else(|| {
411 ArrowError::ComputeError("Internal Error: Cannot cast to BinaryViewArray".to_string())
412 })?;
413 for v in binary_view.iter() {
414 match v {
415 Some(v) => {
416 b.append(v)?;
417 }
418 None => {
419 b.append_null();
420 }
421 }
422 }
423
424 Ok(Arc::new(b.finish()))
425}
426
427pub(crate) fn pack_byte_to_dictionary<K, T>(
430 array: &dyn Array,
431 cast_options: &CastOptions,
432) -> Result<ArrayRef, ArrowError>
433where
434 K: ArrowDictionaryKeyType,
435 T: ByteArrayType,
436{
437 let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
438 let values = cast_values
439 .as_any()
440 .downcast_ref::<GenericByteArray<T>>()
441 .ok_or_else(|| {
442 ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
443 })?;
444 let mut b = GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 1024, 1024);
445
446 for i in 0..values.len() {
448 if values.is_null(i) {
449 b.append_null();
450 } else {
451 b.append(values.value(i))?;
452 }
453 }
454 Ok(Arc::new(b.finish()))
455}
456
457pub(crate) fn pack_byte_to_fixed_size_dictionary<K>(
460 array: &dyn Array,
461 cast_options: &CastOptions,
462 byte_width: i32,
463) -> Result<ArrayRef, ArrowError>
464where
465 K: ArrowDictionaryKeyType,
466{
467 let cast_values =
468 cast_with_options(array, &DataType::FixedSizeBinary(byte_width), cast_options)?;
469 let values = cast_values
470 .as_any()
471 .downcast_ref::<FixedSizeBinaryArray>()
472 .ok_or_else(|| {
473 ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
474 })?;
475 let mut b = FixedSizeBinaryDictionaryBuilder::<K>::with_capacity(1024, 1024, byte_width);
476
477 for i in 0..values.len() {
479 if values.is_null(i) {
480 b.append_null();
481 } else {
482 b.append(values.value(i))?;
483 }
484 }
485 Ok(Arc::new(b.finish()))
486}