Skip to main content

arrow_cast/cast/
list.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::cast::*;
19
20/// Converts a non-list array to a list array where every element is a single element
21/// list. `NULL`s in the original array become `[NULL]` (i.e. output list array
22/// contains no nulls since it wraps all input nulls in a single element list).
23///
24/// For example: `Int32([1, NULL, 2]) -> List<Int32>([[1], [NULL], [2]])`
25pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>(
26    array: &dyn Array,
27    to: &FieldRef,
28    cast_options: &CastOptions,
29) -> Result<ArrayRef, ArrowError> {
30    if array.len() > O::MAX_OFFSET {
31        return Err(ArrowError::ComputeError(format!(
32            "Offset overflow when casting from {} to {}",
33            array.data_type(),
34            to.data_type()
35        )));
36    }
37    let values = cast_with_options(array, to.data_type(), cast_options)?;
38    let offsets = OffsetBuffer::from_repeated_length(1, values.len());
39    let list = GenericListArray::<O>::try_new(to.clone(), offsets, values, None)?;
40    Ok(Arc::new(list))
41}
42
43/// Same as [`cast_values_to_list`] but output list view array.
44pub(crate) fn cast_values_to_list_view<O: OffsetSizeTrait>(
45    array: &dyn Array,
46    to: &FieldRef,
47    cast_options: &CastOptions,
48) -> Result<ArrayRef, ArrowError> {
49    if array.len() > O::MAX_OFFSET {
50        return Err(ArrowError::ComputeError(format!(
51            "Offset overflow when casting from {} to {}",
52            array.data_type(),
53            to.data_type()
54        )));
55    }
56    let values = cast_with_options(array, to.data_type(), cast_options)?;
57    let offsets = (0..values.len())
58        .map(|index| O::usize_as(index))
59        .collect::<Vec<O>>();
60    let list = GenericListViewArray::<O>::try_new(
61        to.clone(),
62        offsets.into(),
63        vec![O::one(); values.len()].into(),
64        values,
65        None,
66    )?;
67    Ok(Arc::new(list))
68}
69
70/// Cast fixed size list array to inner values type, essentially flattening the
71/// lists.
72///
73/// For example: `FixedSizeList<Int32, 2>([[1, 2], [3, 4]]) -> Int32([1, 2, 3, 4])`
74pub(crate) fn cast_single_element_fixed_size_list_to_values(
75    array: &dyn Array,
76    to: &DataType,
77    cast_options: &CastOptions,
78) -> Result<ArrayRef, ArrowError> {
79    let values = array.as_fixed_size_list().values();
80    cast_with_options(values, to, cast_options)
81}
82
83fn cast_fixed_size_list_to_list_inner<OffsetSize: OffsetSizeTrait, const IS_LIST_VIEW: bool>(
84    array: &dyn Array,
85    to: &FieldRef,
86    cast_options: &CastOptions,
87) -> Result<ArrayRef, ArrowError> {
88    let array = array.as_fixed_size_list();
89    let DataType::FixedSizeList(inner_field, size) = array.data_type() else {
90        unreachable!()
91    };
92    let array = if to != inner_field {
93        // To transform inner type, can first cast to FSL with new inner type.
94        let fsl_to = DataType::FixedSizeList(to.clone(), *size);
95        let array = cast_with_options(array, &fsl_to, cast_options)?;
96        array.as_fixed_size_list().clone()
97    } else {
98        array.clone()
99    };
100    if IS_LIST_VIEW {
101        let list: GenericListViewArray<OffsetSize> = array.into();
102        Ok(Arc::new(list))
103    } else {
104        let list: GenericListArray<OffsetSize> = array.into();
105        Ok(Arc::new(list))
106    }
107}
108
109/// Cast fixed size list arrays to list arrays, maintaining the lengths of the inner
110/// lists.
111///
112/// For example: `FixedSizeList<Int32, 2>([[1, 2], [3, 4]]) -> List<Int32>([[1, 2], [3, 4]])`
113pub(crate) fn cast_fixed_size_list_to_list<OffsetSize: OffsetSizeTrait>(
114    array: &dyn Array,
115    to: &FieldRef,
116    cast_options: &CastOptions,
117) -> Result<ArrayRef, ArrowError> {
118    cast_fixed_size_list_to_list_inner::<OffsetSize, false>(array, to, cast_options)
119}
120
121/// Same as [`cast_fixed_size_list_to_list`] but output list view array.
122pub(crate) fn cast_fixed_size_list_to_list_view<OffsetSize: OffsetSizeTrait>(
123    array: &dyn Array,
124    to: &FieldRef,
125    cast_options: &CastOptions,
126) -> Result<ArrayRef, ArrowError> {
127    cast_fixed_size_list_to_list_inner::<OffsetSize, true>(array, to, cast_options)
128}
129
130/// Cast list to fixed size list array. If any inner list size does not match the
131/// size of the output fixed size list array, depending on `cast_options` we either
132/// output `NULL` for that element (safe) or raise an error.
133pub(crate) fn cast_list_to_fixed_size_list<OffsetSize>(
134    array: &dyn Array,
135    field: &FieldRef,
136    size: i32,
137    cast_options: &CastOptions,
138) -> Result<ArrayRef, ArrowError>
139where
140    OffsetSize: OffsetSizeTrait,
141{
142    let array = array.as_list::<OffsetSize>();
143
144    let cap = array.len() * size as usize;
145
146    let mut null_builder = NullBufferBuilder::new(array.len());
147    if let Some(nulls) = array.nulls().filter(|b| b.null_count() > 0) {
148        null_builder.append_buffer(nulls);
149    } else {
150        null_builder.append_n_non_nulls(array.len());
151    }
152
153    // Whether the resulting array may contain null lists
154    let nullable = cast_options.safe || array.null_count() != 0;
155    // Nulls in FixedSizeListArray take up space and so we must pad the values
156    let values = array.values().to_data();
157    let mut mutable = MutableArrayData::new(vec![&values], nullable, cap);
158    // The end position in values of the last incorrectly-sized list slice
159    let mut last_pos = 0;
160
161    // Need to flag when previous vector(s) are empty/None to distinguish from 'All slices were correct length' cases.
162    let is_prev_empty = if array.offsets().len() < 2 {
163        false
164    } else {
165        let first_offset = array.offsets()[0].as_usize();
166        let second_offset = array.offsets()[1].as_usize();
167
168        first_offset == 0 && second_offset == 0
169    };
170
171    for (idx, w) in array.offsets().windows(2).enumerate() {
172        let start_pos = w[0].as_usize();
173        let end_pos = w[1].as_usize();
174        let len = end_pos - start_pos;
175
176        if len != size as usize {
177            if cast_options.safe || array.is_null(idx) {
178                if last_pos != start_pos {
179                    // Extend with valid slices
180                    mutable
181                        .try_extend(0, last_pos, start_pos)
182                        .map_err(|e| ArrowError::CastError(e.to_string()))?;
183                }
184                // Pad this slice with nulls
185                mutable
186                    .try_extend_nulls(size as _)
187                    .map_err(|e| ArrowError::CastError(e.to_string()))?;
188                null_builder.set_bit(idx, false);
189                // Set last_pos to the end of this slice's values
190                last_pos = end_pos
191            } else {
192                return Err(ArrowError::CastError(format!(
193                    "Cannot cast to FixedSizeList({size}): value at index {idx} has length {len}",
194                )));
195            }
196        }
197    }
198
199    let values = match last_pos {
200        0 if !is_prev_empty => array.values().slice(0, cap), // All slices were the correct length
201        _ => {
202            if mutable.len() != cap {
203                // Remaining slices were all correct length
204                let remaining = cap - mutable.len();
205                mutable
206                    .try_extend(0, last_pos, last_pos + remaining)
207                    .map_err(|e| ArrowError::CastError(e.to_string()))?;
208            }
209            make_array(mutable.freeze())
210        }
211    };
212
213    // Cast the inner values if necessary
214    let values = cast_with_options(values.as_ref(), field.data_type(), cast_options)?;
215
216    let nulls = null_builder.build();
217    // Degenerate case where we may lose length information if there isn't a null
218    // buffer to infer length from
219    let array = if size == 0 && nulls.is_none() {
220        FixedSizeListArray::try_new_with_length(field.clone(), size, values, nulls, array.len())?
221    } else {
222        FixedSizeListArray::try_new(field.clone(), size, values, nulls)?
223    };
224    Ok(Arc::new(array))
225}
226
227/// Same as [`cast_list_to_fixed_size_list`] but for list view arrays.
228pub(crate) fn cast_list_view_to_fixed_size_list<O: OffsetSizeTrait>(
229    array: &dyn Array,
230    field: &FieldRef,
231    size: i32,
232    cast_options: &CastOptions,
233) -> Result<ArrayRef, ArrowError> {
234    let array = array.as_list_view::<O>();
235
236    let mut null_builder = NullBufferBuilder::new(array.len());
237    if let Some(nulls) = array.nulls().filter(|b| b.null_count() > 0) {
238        null_builder.append_buffer(nulls);
239    } else {
240        null_builder.append_n_non_nulls(array.len());
241    }
242
243    let nullable = cast_options.safe || array.null_count() != 0;
244    let values = array.values().to_data();
245    let cap = array.len() * size as usize;
246    let mut mutable = MutableArrayData::new(vec![&values], nullable, cap);
247
248    for idx in 0..array.len() {
249        let offset = array.value_offset(idx).as_usize();
250        let len = array.value_size(idx).as_usize();
251
252        if len != size as usize {
253            // Nulls in FixedSizeListArray take up space and so we must pad the values
254            if cast_options.safe || array.is_null(idx) {
255                mutable
256                    .try_extend_nulls(size as _)
257                    .map_err(|e| ArrowError::CastError(e.to_string()))?;
258                null_builder.set_bit(idx, false);
259            } else {
260                return Err(ArrowError::CastError(format!(
261                    "Cannot cast to FixedSizeList({size}): value at index {idx} has length {len}",
262                )));
263            }
264        } else {
265            mutable
266                .try_extend(0, offset, offset + len)
267                .map_err(|e| ArrowError::CastError(e.to_string()))?;
268        }
269    }
270
271    let values = make_array(mutable.freeze());
272    let values = cast_with_options(values.as_ref(), field.data_type(), cast_options)?;
273
274    let nulls = null_builder.build();
275    // Degenerate case where we may lose length information if there isn't a null
276    // buffer to infer length from
277    let array = if size == 0 && nulls.is_none() {
278        FixedSizeListArray::try_new_with_length(field.clone(), size, values, nulls, array.len())?
279    } else {
280        FixedSizeListArray::try_new(field.clone(), size, values, nulls)?
281    };
282    Ok(Arc::new(array))
283}
284
285/// Casting between list arrays of same offset size; we cast only the inner type.
286pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
287    array: &dyn Array,
288    to: &FieldRef,
289    cast_options: &CastOptions,
290) -> Result<ArrayRef, ArrowError> {
291    let list = array.as_list::<O>();
292    let values = cast_with_options(list.values(), to.data_type(), cast_options)?;
293    Ok(Arc::new(GenericListArray::<O>::try_new(
294        to.clone(),
295        list.offsets().clone(),
296        values,
297        list.nulls().cloned(),
298    )?))
299}
300
301/// Casting between list view arrays of same offset size; we cast only the inner type.
302pub(crate) fn cast_list_view_values<O: OffsetSizeTrait>(
303    array: &dyn Array,
304    to: &FieldRef,
305    cast_options: &CastOptions,
306) -> Result<ArrayRef, ArrowError> {
307    let list = array.as_list_view::<O>();
308    let values = cast_with_options(list.values(), to.data_type(), cast_options)?;
309    Ok(Arc::new(GenericListViewArray::<O>::try_new(
310        to.clone(),
311        list.offsets().clone(),
312        list.sizes().clone(),
313        values,
314        list.nulls().cloned(),
315    )?))
316}
317
318/// Casting between list arrays of different offset size (e.g. List -> LargeList)
319pub(crate) fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
320    array: &dyn Array,
321    field: &FieldRef,
322    cast_options: &CastOptions,
323) -> Result<ArrayRef, ArrowError> {
324    let list = array.as_list::<I>();
325    let values = list.values();
326    let offsets = list.offsets();
327    let nulls = list.nulls().cloned();
328
329    if offsets.last().unwrap().as_usize() > O::MAX_OFFSET {
330        return Err(ArrowError::ComputeError(format!(
331            "Offset overflow when casting from {} to {}",
332            array.data_type(),
333            field.data_type()
334        )));
335    }
336
337    // Recursively cast values
338    let values = cast_with_options(values, field.data_type(), cast_options)?;
339    let offsets: Vec<_> = offsets.iter().map(|x| O::usize_as(x.as_usize())).collect();
340
341    // Safety: valid offsets and checked for overflow
342    let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
343
344    Ok(Arc::new(GenericListArray::<O>::try_new(
345        field.clone(),
346        offsets,
347        values,
348        nulls,
349    )?))
350}
351
352/// Casting list view arrays to list.
353pub(crate) fn cast_list_view_to_list<I, O>(
354    array: &dyn Array,
355    to: &FieldRef,
356    cast_options: &CastOptions,
357) -> Result<ArrayRef, ArrowError>
358where
359    I: OffsetSizeTrait,
360    // We need ArrowPrimitiveType here to be able to create indices array for the
361    // take kernel.
362    O: ArrowPrimitiveType,
363    O::Native: OffsetSizeTrait,
364{
365    let list_view = array.as_list_view::<I>();
366    let list_view_offsets = list_view.offsets();
367    let sizes = list_view.sizes();
368
369    let mut take_indices: Vec<O::Native> = Vec::with_capacity(list_view.values().len());
370    let mut offsets: Vec<O::Native> = Vec::with_capacity(list_view.len() + 1);
371    use num_traits::Zero;
372    offsets.push(O::Native::zero());
373
374    for i in 0..list_view.len() {
375        if list_view.is_null(i) {
376            offsets.push(O::Native::usize_as(take_indices.len()));
377            continue;
378        }
379
380        let offset = list_view_offsets[i].as_usize();
381        let size = sizes[i].as_usize();
382
383        for value_index in offset..offset + size {
384            take_indices.push(O::Native::usize_as(value_index));
385        }
386
387        // Must guard all cases since ListView<i32> can overflow List<i32>
388        // e.g. if offsets of [0, 0, 0] and sizes [i32::MAX, i32::MAX, i32::MAX]
389        if take_indices.len() > O::Native::MAX_OFFSET {
390            return Err(ArrowError::ComputeError(format!(
391                "Offset overflow when casting from {} to {}",
392                array.data_type(),
393                to.data_type()
394            )));
395        }
396        offsets.push(O::Native::usize_as(take_indices.len()));
397    }
398
399    // Form a contiguous values array
400    let take_indices = PrimitiveArray::<O>::from_iter_values(take_indices);
401    let values = arrow_select::take::take(list_view.values(), &take_indices, None)?;
402    let values = cast_with_options(&values, to.data_type(), cast_options)?;
403
404    Ok(Arc::new(GenericListArray::<O::Native>::try_new(
405        to.clone(),
406        OffsetBuffer::new(offsets.into()),
407        values,
408        list_view.nulls().cloned(),
409    )?))
410}
411
412/// Casting between list view arrays of different offset size (e.g. ListView -> LargeListView)
413pub(crate) fn cast_list_view<I: OffsetSizeTrait, O: OffsetSizeTrait>(
414    array: &dyn Array,
415    to_field: &FieldRef,
416    cast_options: &CastOptions,
417) -> Result<ArrayRef, ArrowError> {
418    let list_view = array.as_list_view::<I>();
419
420    // Recursively cast values
421    let values = cast_with_options(list_view.values(), to_field.data_type(), cast_options)?;
422
423    let offsets = list_view
424        .offsets()
425        .iter()
426        .map(|offset| {
427            let offset = offset.as_usize();
428            if offset > O::MAX_OFFSET {
429                return Err(ArrowError::ComputeError(format!(
430                    "Offset overflow when casting from {} to {}",
431                    array.data_type(),
432                    to_field.data_type()
433                )));
434            }
435            Ok(O::usize_as(offset))
436        })
437        .collect::<Result<Vec<O>, _>>()?;
438    let sizes = list_view
439        .sizes()
440        .iter()
441        .map(|size| {
442            let size = size.as_usize();
443            if size > O::MAX_OFFSET {
444                return Err(ArrowError::ComputeError(format!(
445                    "Offset overflow when casting from {} to {}",
446                    array.data_type(),
447                    to_field.data_type()
448                )));
449            }
450            Ok(O::usize_as(size))
451        })
452        .collect::<Result<Vec<O>, _>>()?;
453    Ok(Arc::new(GenericListViewArray::<O>::try_new(
454        to_field.clone(),
455        offsets.into(),
456        sizes.into(),
457        values,
458        list_view.nulls().cloned(),
459    )?))
460}
461
462/// Casting list arrays to list view.
463pub(crate) fn cast_list_to_list_view<I: OffsetSizeTrait, O: OffsetSizeTrait>(
464    array: &dyn Array,
465    to_field: &FieldRef,
466    cast_options: &CastOptions,
467) -> Result<ArrayRef, ArrowError> {
468    let list = array.as_list::<I>();
469    let (_field, offsets, values, nulls) = list.clone().into_parts();
470
471    let len = offsets.len() - 1;
472    let mut sizes = Vec::with_capacity(len);
473    let mut view_offsets = Vec::with_capacity(len);
474    for (i, offset) in offsets.iter().enumerate().take(len) {
475        let offset = offset.as_usize();
476        let size = offsets[i + 1].as_usize() - offset;
477
478        if offset > O::MAX_OFFSET || size > O::MAX_OFFSET {
479            return Err(ArrowError::ComputeError(format!(
480                "Offset overflow when casting from {} to {}",
481                array.data_type(),
482                to_field.data_type()
483            )));
484        }
485
486        view_offsets.push(O::usize_as(offset));
487        sizes.push(O::usize_as(size));
488    }
489    let values = cast_with_options(&values, to_field.data_type(), cast_options)?;
490    let array = GenericListViewArray::<O>::new(
491        to_field.clone(),
492        view_offsets.into(),
493        sizes.into(),
494        values,
495        nulls,
496    );
497    Ok(Arc::new(array))
498}