arrow_cast/cast/
list.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::cast::*;
19
20/// Helper function that takes a primitive array and casts to a (generic) list array.
21pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>(
22    array: &dyn Array,
23    to: &FieldRef,
24    cast_options: &CastOptions,
25) -> Result<ArrayRef, ArrowError> {
26    let values = cast_with_options(array, to.data_type(), cast_options)?;
27    let offsets = OffsetBuffer::from_lengths(std::iter::repeat(1).take(values.len()));
28    let list = GenericListArray::<O>::new(to.clone(), offsets, values, None);
29    Ok(Arc::new(list))
30}
31
32/// Helper function that takes a primitive array and casts to a fixed size list array.
33pub(crate) fn cast_values_to_fixed_size_list(
34    array: &dyn Array,
35    to: &FieldRef,
36    size: i32,
37    cast_options: &CastOptions,
38) -> Result<ArrayRef, ArrowError> {
39    let values = cast_with_options(array, to.data_type(), cast_options)?;
40    let list = FixedSizeListArray::new(to.clone(), size, values, None);
41    Ok(Arc::new(list))
42}
43
44pub(crate) fn cast_single_element_fixed_size_list_to_values(
45    array: &dyn Array,
46    to: &DataType,
47    cast_options: &CastOptions,
48) -> Result<ArrayRef, ArrowError> {
49    let values = array.as_fixed_size_list().values();
50    cast_with_options(values, to, cast_options)
51}
52
53pub(crate) fn cast_fixed_size_list_to_list<OffsetSize>(
54    array: &dyn Array,
55) -> Result<ArrayRef, ArrowError>
56where
57    OffsetSize: OffsetSizeTrait,
58{
59    let fixed_size_list: &FixedSizeListArray = array.as_fixed_size_list();
60    let list: GenericListArray<OffsetSize> = fixed_size_list.clone().into();
61    Ok(Arc::new(list))
62}
63
64pub(crate) fn cast_list_to_fixed_size_list<OffsetSize>(
65    array: &GenericListArray<OffsetSize>,
66    field: &FieldRef,
67    size: i32,
68    cast_options: &CastOptions,
69) -> Result<ArrayRef, ArrowError>
70where
71    OffsetSize: OffsetSizeTrait,
72{
73    let cap = array.len() * size as usize;
74
75    // Whether the resulting array may contain null lists
76    let nullable = cast_options.safe || array.null_count() != 0;
77    let mut nulls = nullable.then(|| {
78        let mut buffer = BooleanBufferBuilder::new(array.len());
79        match array.nulls() {
80            Some(n) => buffer.append_buffer(n.inner()),
81            None => buffer.append_n(array.len(), true),
82        }
83        buffer
84    });
85
86    // Nulls in FixedSizeListArray take up space and so we must pad the values
87    let values = array.values().to_data();
88    let mut mutable = MutableArrayData::new(vec![&values], nullable, cap);
89    // The end position in values of the last incorrectly-sized list slice
90    let mut last_pos = 0;
91
92    // Need to flag when previous vector(s) are empty/None to distinguish from 'All slices were correct length' cases.
93    let is_prev_empty = if array.offsets().len() < 2 {
94        false
95    } else {
96        let first_offset = array.offsets()[0].as_usize();
97        let second_offset = array.offsets()[1].as_usize();
98
99        first_offset == 0 && second_offset == 0
100    };
101
102    for (idx, w) in array.offsets().windows(2).enumerate() {
103        let start_pos = w[0].as_usize();
104        let end_pos = w[1].as_usize();
105        let len = end_pos - start_pos;
106
107        if len != size as usize {
108            if cast_options.safe || array.is_null(idx) {
109                if last_pos != start_pos {
110                    // Extend with valid slices
111                    mutable.extend(0, last_pos, start_pos);
112                }
113                // Pad this slice with nulls
114                mutable.extend_nulls(size as _);
115                nulls.as_mut().unwrap().set_bit(idx, false);
116                // Set last_pos to the end of this slice's values
117                last_pos = end_pos
118            } else {
119                return Err(ArrowError::CastError(format!(
120                    "Cannot cast to FixedSizeList({size}): value at index {idx} has length {len}",
121                )));
122            }
123        }
124    }
125
126    let values = match last_pos {
127        0 if !is_prev_empty => array.values().slice(0, cap), // All slices were the correct length
128        _ => {
129            if mutable.len() != cap {
130                // Remaining slices were all correct length
131                let remaining = cap - mutable.len();
132                mutable.extend(0, last_pos, last_pos + remaining)
133            }
134            make_array(mutable.freeze())
135        }
136    };
137
138    // Cast the inner values if necessary
139    let values = cast_with_options(values.as_ref(), field.data_type(), cast_options)?;
140
141    // Construct the FixedSizeListArray
142    let nulls = nulls.map(|mut x| x.finish().into());
143    let array = FixedSizeListArray::new(field.clone(), size, values, nulls);
144    Ok(Arc::new(array))
145}
146
147/// Helper function that takes an Generic list container and casts the inner datatype.
148pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
149    array: &dyn Array,
150    to: &FieldRef,
151    cast_options: &CastOptions,
152) -> Result<ArrayRef, ArrowError> {
153    let list = array.as_list::<O>();
154    let values = cast_with_options(list.values(), to.data_type(), cast_options)?;
155    Ok(Arc::new(GenericListArray::<O>::new(
156        to.clone(),
157        list.offsets().clone(),
158        values,
159        list.nulls().cloned(),
160    )))
161}
162
163/// Cast the container type of List/Largelist array along with the inner datatype
164pub(crate) fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
165    array: &dyn Array,
166    field: &FieldRef,
167    cast_options: &CastOptions,
168) -> Result<ArrayRef, ArrowError> {
169    let list = array.as_list::<I>();
170    let values = list.values();
171    let offsets = list.offsets();
172    let nulls = list.nulls().cloned();
173
174    if !O::IS_LARGE && values.len() > i32::MAX as usize {
175        return Err(ArrowError::ComputeError(
176            "LargeList too large to cast to List".into(),
177        ));
178    }
179
180    // Recursively cast values
181    let values = cast_with_options(values, field.data_type(), cast_options)?;
182    let offsets: Vec<_> = offsets.iter().map(|x| O::usize_as(x.as_usize())).collect();
183
184    // Safety: valid offsets and checked for overflow
185    let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
186
187    Ok(Arc::new(GenericListArray::<O>::new(
188        field.clone(),
189        offsets,
190        values,
191        nulls,
192    )))
193}