Skip to main content

arrow_cast/cast/
list.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::cast::*;
19
20/// Converts a non-list array to a list array where every element is a single element
21/// list. `NULL`s in the original array become `[NULL]` (i.e. output list array
22/// contains no nulls since it wraps all input nulls in a single element list).
23///
24/// For example: `Int32([1, NULL, 2]) -> List<Int32>([[1], [NULL], [2]])`
25pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>(
26    array: &dyn Array,
27    to: &FieldRef,
28    cast_options: &CastOptions,
29) -> Result<ArrayRef, ArrowError> {
30    if array.len() > O::MAX_OFFSET {
31        return Err(ArrowError::ComputeError(format!(
32            "Offset overflow when casting from {} to {}",
33            array.data_type(),
34            to.data_type()
35        )));
36    }
37    let values = cast_with_options(array, to.data_type(), cast_options)?;
38    let offsets = OffsetBuffer::from_repeated_length(1, values.len());
39    let list = GenericListArray::<O>::try_new(to.clone(), offsets, values, None)?;
40    Ok(Arc::new(list))
41}
42
43/// Same as [`cast_values_to_list`] but output list view array.
44pub(crate) fn cast_values_to_list_view<O: OffsetSizeTrait>(
45    array: &dyn Array,
46    to: &FieldRef,
47    cast_options: &CastOptions,
48) -> Result<ArrayRef, ArrowError> {
49    if array.len() > O::MAX_OFFSET {
50        return Err(ArrowError::ComputeError(format!(
51            "Offset overflow when casting from {} to {}",
52            array.data_type(),
53            to.data_type()
54        )));
55    }
56    let values = cast_with_options(array, to.data_type(), cast_options)?;
57    let offsets = (0..values.len())
58        .map(|index| O::usize_as(index))
59        .collect::<Vec<O>>();
60    let list = GenericListViewArray::<O>::try_new(
61        to.clone(),
62        offsets.into(),
63        vec![O::one(); values.len()].into(),
64        values,
65        None,
66    )?;
67    Ok(Arc::new(list))
68}
69
70/// Same as [`cast_values_to_list`] but output fixed size list array with element
71/// size 1.
72pub(crate) fn cast_values_to_fixed_size_list(
73    array: &dyn Array,
74    to: &FieldRef,
75    size: i32,
76    cast_options: &CastOptions,
77) -> Result<ArrayRef, ArrowError> {
78    let values = cast_with_options(array, to.data_type(), cast_options)?;
79    let list = FixedSizeListArray::try_new(to.clone(), size, values, None)?;
80    Ok(Arc::new(list))
81}
82
83/// Cast fixed size list array to inner values type, essentially flattening the
84/// lists.
85///
86/// For example: `FixedSizeList<Int32, 2>([[1, 2], [3, 4]]) -> Int32([1, 2, 3, 4])`
87pub(crate) fn cast_single_element_fixed_size_list_to_values(
88    array: &dyn Array,
89    to: &DataType,
90    cast_options: &CastOptions,
91) -> Result<ArrayRef, ArrowError> {
92    let values = array.as_fixed_size_list().values();
93    cast_with_options(values, to, cast_options)
94}
95
96fn cast_fixed_size_list_to_list_inner<OffsetSize: OffsetSizeTrait, const IS_LIST_VIEW: bool>(
97    array: &dyn Array,
98    to: &FieldRef,
99    cast_options: &CastOptions,
100) -> Result<ArrayRef, ArrowError> {
101    let array = array.as_fixed_size_list();
102    let DataType::FixedSizeList(inner_field, size) = array.data_type() else {
103        unreachable!()
104    };
105    let array = if to.data_type() != inner_field.data_type() {
106        // To transform inner type, can first cast to FSL with new inner type.
107        let fsl_to = DataType::FixedSizeList(to.clone(), *size);
108        let array = cast_with_options(array, &fsl_to, cast_options)?;
109        array.as_fixed_size_list().clone()
110    } else {
111        array.clone()
112    };
113    if IS_LIST_VIEW {
114        let list: GenericListViewArray<OffsetSize> = array.into();
115        Ok(Arc::new(list))
116    } else {
117        let list: GenericListArray<OffsetSize> = array.into();
118        Ok(Arc::new(list))
119    }
120}
121
122/// Cast fixed size list arrays to list arrays, maintaining the lengths of the inner
123/// lists.
124///
125/// For example: `FixedSizeList<Int32, 2>([[1, 2], [3, 4]]) -> List<Int32>([[1, 2], [3, 4]])`
126pub(crate) fn cast_fixed_size_list_to_list<OffsetSize: OffsetSizeTrait>(
127    array: &dyn Array,
128    to: &FieldRef,
129    cast_options: &CastOptions,
130) -> Result<ArrayRef, ArrowError> {
131    cast_fixed_size_list_to_list_inner::<OffsetSize, false>(array, to, cast_options)
132}
133
134/// Same as [`cast_fixed_size_list_to_list`] but output list view array.
135pub(crate) fn cast_fixed_size_list_to_list_view<OffsetSize: OffsetSizeTrait>(
136    array: &dyn Array,
137    to: &FieldRef,
138    cast_options: &CastOptions,
139) -> Result<ArrayRef, ArrowError> {
140    cast_fixed_size_list_to_list_inner::<OffsetSize, true>(array, to, cast_options)
141}
142
143/// Cast list to fixed size list array. If any inner list size does not match the
144/// size of the output fixed size list array, depending on `cast_options` we either
145/// output `NULL` for that element (safe) or raise an error.
146pub(crate) fn cast_list_to_fixed_size_list<OffsetSize>(
147    array: &dyn Array,
148    field: &FieldRef,
149    size: i32,
150    cast_options: &CastOptions,
151) -> Result<ArrayRef, ArrowError>
152where
153    OffsetSize: OffsetSizeTrait,
154{
155    let array = array.as_list::<OffsetSize>();
156
157    let cap = array.len() * size as usize;
158
159    let mut null_builder = NullBufferBuilder::new(array.len());
160    if let Some(nulls) = array.nulls().filter(|b| b.null_count() > 0) {
161        null_builder.append_buffer(nulls);
162    } else {
163        null_builder.append_n_non_nulls(array.len());
164    }
165
166    // Whether the resulting array may contain null lists
167    let nullable = cast_options.safe || array.null_count() != 0;
168    // Nulls in FixedSizeListArray take up space and so we must pad the values
169    let values = array.values().to_data();
170    let mut mutable = MutableArrayData::new(vec![&values], nullable, cap);
171    // The end position in values of the last incorrectly-sized list slice
172    let mut last_pos = 0;
173
174    // Need to flag when previous vector(s) are empty/None to distinguish from 'All slices were correct length' cases.
175    let is_prev_empty = if array.offsets().len() < 2 {
176        false
177    } else {
178        let first_offset = array.offsets()[0].as_usize();
179        let second_offset = array.offsets()[1].as_usize();
180
181        first_offset == 0 && second_offset == 0
182    };
183
184    for (idx, w) in array.offsets().windows(2).enumerate() {
185        let start_pos = w[0].as_usize();
186        let end_pos = w[1].as_usize();
187        let len = end_pos - start_pos;
188
189        if len != size as usize {
190            if cast_options.safe || array.is_null(idx) {
191                if last_pos != start_pos {
192                    // Extend with valid slices
193                    mutable.extend(0, last_pos, start_pos);
194                }
195                // Pad this slice with nulls
196                mutable.extend_nulls(size as _);
197                null_builder.set_bit(idx, false);
198                // Set last_pos to the end of this slice's values
199                last_pos = end_pos
200            } else {
201                return Err(ArrowError::CastError(format!(
202                    "Cannot cast to FixedSizeList({size}): value at index {idx} has length {len}",
203                )));
204            }
205        }
206    }
207
208    let values = match last_pos {
209        0 if !is_prev_empty => array.values().slice(0, cap), // All slices were the correct length
210        _ => {
211            if mutable.len() != cap {
212                // Remaining slices were all correct length
213                let remaining = cap - mutable.len();
214                mutable.extend(0, last_pos, last_pos + remaining)
215            }
216            make_array(mutable.freeze())
217        }
218    };
219
220    // Cast the inner values if necessary
221    let values = cast_with_options(values.as_ref(), field.data_type(), cast_options)?;
222
223    let array = FixedSizeListArray::try_new(field.clone(), size, values, null_builder.build())?;
224    Ok(Arc::new(array))
225}
226
227/// Same as [`cast_list_to_fixed_size_list`] but for list view arrays.
228pub(crate) fn cast_list_view_to_fixed_size_list<O: OffsetSizeTrait>(
229    array: &dyn Array,
230    field: &FieldRef,
231    size: i32,
232    cast_options: &CastOptions,
233) -> Result<ArrayRef, ArrowError> {
234    let array = array.as_list_view::<O>();
235
236    let mut null_builder = NullBufferBuilder::new(array.len());
237    if let Some(nulls) = array.nulls().filter(|b| b.null_count() > 0) {
238        null_builder.append_buffer(nulls);
239    } else {
240        null_builder.append_n_non_nulls(array.len());
241    }
242
243    let nullable = cast_options.safe || array.null_count() != 0;
244    let values = array.values().to_data();
245    let cap = array.len() * size as usize;
246    let mut mutable = MutableArrayData::new(vec![&values], nullable, cap);
247
248    for idx in 0..array.len() {
249        let offset = array.value_offset(idx).as_usize();
250        let len = array.value_size(idx).as_usize();
251
252        if len != size as usize {
253            // Nulls in FixedSizeListArray take up space and so we must pad the values
254            if cast_options.safe || array.is_null(idx) {
255                mutable.extend_nulls(size as _);
256                null_builder.set_bit(idx, false);
257            } else {
258                return Err(ArrowError::CastError(format!(
259                    "Cannot cast to FixedSizeList({size}): value at index {idx} has length {len}",
260                )));
261            }
262        } else {
263            mutable.extend(0, offset, offset + len);
264        }
265    }
266
267    let values = make_array(mutable.freeze());
268    let values = cast_with_options(values.as_ref(), field.data_type(), cast_options)?;
269
270    let array = FixedSizeListArray::try_new(field.clone(), size, values, null_builder.build())?;
271    Ok(Arc::new(array))
272}
273
274/// Casting between list arrays of same offset size; we cast only the inner type.
275pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
276    array: &dyn Array,
277    to: &FieldRef,
278    cast_options: &CastOptions,
279) -> Result<ArrayRef, ArrowError> {
280    let list = array.as_list::<O>();
281    let values = cast_with_options(list.values(), to.data_type(), cast_options)?;
282    Ok(Arc::new(GenericListArray::<O>::try_new(
283        to.clone(),
284        list.offsets().clone(),
285        values,
286        list.nulls().cloned(),
287    )?))
288}
289
290/// Casting between list view arrays of same offset size; we cast only the inner type.
291pub(crate) fn cast_list_view_values<O: OffsetSizeTrait>(
292    array: &dyn Array,
293    to: &FieldRef,
294    cast_options: &CastOptions,
295) -> Result<ArrayRef, ArrowError> {
296    let list = array.as_list_view::<O>();
297    let values = cast_with_options(list.values(), to.data_type(), cast_options)?;
298    Ok(Arc::new(GenericListViewArray::<O>::try_new(
299        to.clone(),
300        list.offsets().clone(),
301        list.sizes().clone(),
302        values,
303        list.nulls().cloned(),
304    )?))
305}
306
307/// Casting between list arrays of different offset size (e.g. List -> LargeList)
308pub(crate) fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
309    array: &dyn Array,
310    field: &FieldRef,
311    cast_options: &CastOptions,
312) -> Result<ArrayRef, ArrowError> {
313    let list = array.as_list::<I>();
314    let values = list.values();
315    let offsets = list.offsets();
316    let nulls = list.nulls().cloned();
317
318    if offsets.last().unwrap().as_usize() > O::MAX_OFFSET {
319        return Err(ArrowError::ComputeError(format!(
320            "Offset overflow when casting from {} to {}",
321            array.data_type(),
322            field.data_type()
323        )));
324    }
325
326    // Recursively cast values
327    let values = cast_with_options(values, field.data_type(), cast_options)?;
328    let offsets: Vec<_> = offsets.iter().map(|x| O::usize_as(x.as_usize())).collect();
329
330    // Safety: valid offsets and checked for overflow
331    let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
332
333    Ok(Arc::new(GenericListArray::<O>::try_new(
334        field.clone(),
335        offsets,
336        values,
337        nulls,
338    )?))
339}
340
341/// Casting list view arrays to list.
342pub(crate) fn cast_list_view_to_list<I, O>(
343    array: &dyn Array,
344    to: &FieldRef,
345    cast_options: &CastOptions,
346) -> Result<ArrayRef, ArrowError>
347where
348    I: OffsetSizeTrait,
349    // We need ArrowPrimitiveType here to be able to create indices array for the
350    // take kernel.
351    O: ArrowPrimitiveType,
352    O::Native: OffsetSizeTrait,
353{
354    let list_view = array.as_list_view::<I>();
355    let list_view_offsets = list_view.offsets();
356    let sizes = list_view.sizes();
357
358    let mut take_indices: Vec<O::Native> = Vec::with_capacity(list_view.values().len());
359    let mut offsets: Vec<O::Native> = Vec::with_capacity(list_view.len() + 1);
360    use num_traits::Zero;
361    offsets.push(O::Native::zero());
362
363    for i in 0..list_view.len() {
364        if list_view.is_null(i) {
365            offsets.push(O::Native::usize_as(take_indices.len()));
366            continue;
367        }
368
369        let offset = list_view_offsets[i].as_usize();
370        let size = sizes[i].as_usize();
371
372        for value_index in offset..offset + size {
373            take_indices.push(O::Native::usize_as(value_index));
374        }
375
376        // Must guard all cases since ListView<i32> can overflow List<i32>
377        // e.g. if offsets of [0, 0, 0] and sizes [i32::MAX, i32::MAX, i32::MAX]
378        if take_indices.len() > O::Native::MAX_OFFSET {
379            return Err(ArrowError::ComputeError(format!(
380                "Offset overflow when casting from {} to {}",
381                array.data_type(),
382                to.data_type()
383            )));
384        }
385        offsets.push(O::Native::usize_as(take_indices.len()));
386    }
387
388    // Form a contiguous values array
389    let take_indices = PrimitiveArray::<O>::from_iter_values(take_indices);
390    let values = arrow_select::take::take(list_view.values(), &take_indices, None)?;
391    let values = cast_with_options(&values, to.data_type(), cast_options)?;
392
393    Ok(Arc::new(GenericListArray::<O::Native>::try_new(
394        to.clone(),
395        OffsetBuffer::new(offsets.into()),
396        values,
397        list_view.nulls().cloned(),
398    )?))
399}
400
401/// Casting between list view arrays of different offset size (e.g. ListView -> LargeListView)
402pub(crate) fn cast_list_view<I: OffsetSizeTrait, O: OffsetSizeTrait>(
403    array: &dyn Array,
404    to_field: &FieldRef,
405    cast_options: &CastOptions,
406) -> Result<ArrayRef, ArrowError> {
407    let list_view = array.as_list_view::<I>();
408
409    // Recursively cast values
410    let values = cast_with_options(list_view.values(), to_field.data_type(), cast_options)?;
411
412    let offsets = list_view
413        .offsets()
414        .iter()
415        .map(|offset| {
416            let offset = offset.as_usize();
417            if offset > O::MAX_OFFSET {
418                return Err(ArrowError::ComputeError(format!(
419                    "Offset overflow when casting from {} to {}",
420                    array.data_type(),
421                    to_field.data_type()
422                )));
423            }
424            Ok(O::usize_as(offset))
425        })
426        .collect::<Result<Vec<O>, _>>()?;
427    let sizes = list_view
428        .sizes()
429        .iter()
430        .map(|size| {
431            let size = size.as_usize();
432            if size > O::MAX_OFFSET {
433                return Err(ArrowError::ComputeError(format!(
434                    "Offset overflow when casting from {} to {}",
435                    array.data_type(),
436                    to_field.data_type()
437                )));
438            }
439            Ok(O::usize_as(size))
440        })
441        .collect::<Result<Vec<O>, _>>()?;
442    Ok(Arc::new(GenericListViewArray::<O>::try_new(
443        to_field.clone(),
444        offsets.into(),
445        sizes.into(),
446        values,
447        list_view.nulls().cloned(),
448    )?))
449}
450
451/// Casting list arrays to list view.
452pub(crate) fn cast_list_to_list_view<I: OffsetSizeTrait, O: OffsetSizeTrait>(
453    array: &dyn Array,
454    to_field: &FieldRef,
455    cast_options: &CastOptions,
456) -> Result<ArrayRef, ArrowError> {
457    let list = array.as_list::<I>();
458    let (_field, offsets, values, nulls) = list.clone().into_parts();
459
460    let len = offsets.len() - 1;
461    let mut sizes = Vec::with_capacity(len);
462    let mut view_offsets = Vec::with_capacity(len);
463    for (i, offset) in offsets.iter().enumerate().take(len) {
464        let offset = offset.as_usize();
465        let size = offsets[i + 1].as_usize() - offset;
466
467        if offset > O::MAX_OFFSET || size > O::MAX_OFFSET {
468            return Err(ArrowError::ComputeError(format!(
469                "Offset overflow when casting from {} to {}",
470                array.data_type(),
471                to_field.data_type()
472            )));
473        }
474
475        view_offsets.push(O::usize_as(offset));
476        sizes.push(O::usize_as(size));
477    }
478    let values = cast_with_options(&values, to_field.data_type(), cast_options)?;
479    let array = GenericListViewArray::<O>::new(
480        to_field.clone(),
481        view_offsets.into(),
482        sizes.into(),
483        values,
484        nulls,
485    );
486    Ok(Arc::new(array))
487}