arrow_array/array/
list_view_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow_buffer::{NullBuffer, ScalarBuffer};
19use arrow_data::{ArrayData, ArrayDataBuilder};
20use arrow_schema::{ArrowError, DataType, FieldRef};
21use std::any::Any;
22use std::ops::Add;
23use std::sync::Arc;
24
25use crate::array::{make_array, print_long_array};
26use crate::iterator::GenericListViewArrayIter;
27use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait, new_empty_array};
28
29/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
30pub type ListViewArray = GenericListViewArray<i32>;
31
32/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i64`.
33pub type LargeListViewArray = GenericListViewArray<i64>;
34
35/// An array of [variable length lists], specifically in the [list-view layout].
36///
37/// Differs from [`GenericListArray`] (which represents the [list layout]) in that
38/// the sizes of the child arrays are explicitly encoded in a separate buffer, instead
39/// of being derived from the difference between subsequent offsets in the offset buffer.
40///
41/// This allows the offsets (and subsequently child data) to be out of order. It also
42/// allows take / filter operations to be implemented without copying the underlying data.
43///
44/// # Representation
45///
46/// Given the same example array from [`GenericListArray`], it would be represented
47/// as such via a list-view layout array:
48///
49/// ```text
50///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
51///                                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    │
52///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐   ┌───┐       ┌───┐ ┌───┐
53///  │   [A,B,C]   │  │ (0,3) │                   │ 1 │   │ 0 │   │ 3 │     │ │ 1 │ │ A │ │ 0  │
54///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
55///  │      []     │  │ (3,0) │                   │ 1 │   │ 3 │   │ 0 │     │ │ 1 │ │ B │ │ 1  │
56///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
57///  │    NULL     │  │ (?,?) │                   │ 0 │   │ ? │   │ ? │     │ │ 1 │ │ C │ │ 2  │
58///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
59///  │     [D]     │  │ (4,1) │                   │ 1 │   │ 4 │   │ 1 │     │ │ ? │ │ ? │ │ 3  │
60///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
61///  │  [NULL, F]  │  │ (5,2) │                   │ 1 │   │ 5 │   │ 2 │     │ │ 1 │ │ D │ │ 4  │
62///  └─────────────┘  └───────┘             │     └───┘   └───┘   └───┘       ├───┤ ├───┤
63///                                                                         │ │ 0 │ │ ? │ │ 5  │
64///     Logical       Logical               │  Validity  Offsets  Sizes       ├───┤ ├───┤
65///      Values       Offset                   (nulls)                      │ │ 1 │ │ F │ │ 6  │
66///                   & Size                │                                 └───┘ └───┘
67///                                                                         │    Values   │    │
68///                 (offsets[i],            │   ListViewArray                   (Array)
69///                  sizes[i])                                              └ ─ ─ ─ ─ ─ ─ ┘    │
70///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
71/// ```
72///
73/// Another way of representing the same array but taking advantage of the offsets being out of order:
74///
75/// ```text
76///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
77///                                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    │
78///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐   ┌───┐       ┌───┐ ┌───┐
79///  │   [A,B,C]   │  │ (2,3) │                   │ 1 │   │ 2 │   │ 3 │     │ │ 0 │ │ ? │ │ 0  │
80///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
81///  │      []     │  │ (0,0) │                   │ 1 │   │ 0 │   │ 0 │     │ │ 1 │ │ F │ │ 1  │
82///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
83///  │    NULL     │  │ (?,?) │                   │ 0 │   │ ? │   │ ? │     │ │ 1 │ │ A │ │ 2  │
84///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
85///  │     [D]     │  │ (5,1) │                   │ 1 │   │ 5 │   │ 1 │     │ │ 1 │ │ B │ │ 3  │
86///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤   ├───┤       ├───┤ ├───┤
87///  │  [NULL, F]  │  │ (0,2) │                   │ 1 │   │ 0 │   │ 2 │     │ │ 1 │ │ C │ │ 4  │
88///  └─────────────┘  └───────┘             │     └───┘   └───┘   └───┘       ├───┤ ├───┤
89///                                                                         │ │ 1 │ │ D │ │ 5  │
90///     Logical       Logical               │  Validity  Offsets  Sizes       └───┘ └───┘
91///      Values       Offset                   (nulls)                      │    Values   │    │
92///                   & Size                │                                   (Array)
93///                                                                         └ ─ ─ ─ ─ ─ ─ ┘    │
94///                 (offsets[i],            │   ListViewArray
95///                  sizes[i])                                                                 │
96///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
97/// ```
98///
99/// [`GenericListArray`]: crate::array::GenericListArray
100/// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout
101/// [list layout]: https://arrow.apache.org/docs/format/Columnar.html#list-layout
102/// [list-view layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout
103#[derive(Clone)]
104pub struct GenericListViewArray<OffsetSize: OffsetSizeTrait> {
105    data_type: DataType,
106    nulls: Option<NullBuffer>,
107    values: ArrayRef,
108    // Unlike GenericListArray, we do not use OffsetBuffer here as offsets are not
109    // guaranteed to be monotonically increasing.
110    value_offsets: ScalarBuffer<OffsetSize>,
111    value_sizes: ScalarBuffer<OffsetSize>,
112}
113
114impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
115    /// The data type constructor of listview array.
116    /// The input is the schema of the child array and
117    /// the output is the [`DataType`], ListView or LargeListView.
118    pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
119        DataType::LargeListView
120    } else {
121        DataType::ListView
122    };
123
124    /// Create a new [`GenericListViewArray`] from the provided parts
125    ///
126    /// # Errors
127    ///
128    /// Errors if
129    ///
130    /// * `offsets.len() != sizes.len()`
131    /// * `offsets.len() != nulls.len()`
132    /// * `offsets[i] > values.len()`
133    /// * `!field.is_nullable() && values.is_nullable()`
134    /// * `field.data_type() != values.data_type()`
135    /// * `0 <= offsets[i] <= length of the child array`
136    /// * `0 <= offsets[i] + size[i] <= length of the child array`
137    pub fn try_new(
138        field: FieldRef,
139        offsets: ScalarBuffer<OffsetSize>,
140        sizes: ScalarBuffer<OffsetSize>,
141        values: ArrayRef,
142        nulls: Option<NullBuffer>,
143    ) -> Result<Self, ArrowError> {
144        let len = offsets.len();
145        if let Some(n) = nulls.as_ref() {
146            if n.len() != len {
147                return Err(ArrowError::InvalidArgumentError(format!(
148                    "Incorrect length of null buffer for {}ListViewArray, expected {len} got {}",
149                    OffsetSize::PREFIX,
150                    n.len(),
151                )));
152            }
153        }
154        if len != sizes.len() {
155            return Err(ArrowError::InvalidArgumentError(format!(
156                "Length of offsets buffer and sizes buffer must be equal for {}ListViewArray, got {len} and {}",
157                OffsetSize::PREFIX,
158                sizes.len()
159            )));
160        }
161
162        for (offset, size) in offsets.iter().zip(sizes.iter()) {
163            let offset = offset.as_usize();
164            let size = size.as_usize();
165            if offset.checked_add(size).ok_or_else(|| {
166                ArrowError::InvalidArgumentError(format!(
167                    "Overflow in offset + size for {}ListViewArray",
168                    OffsetSize::PREFIX
169                ))
170            })? > values.len()
171            {
172                return Err(ArrowError::InvalidArgumentError(format!(
173                    "Offset + size for {}ListViewArray must be within the bounds of the child array, got offset: {offset}, size: {size}, child array length: {}",
174                    OffsetSize::PREFIX,
175                    values.len()
176                )));
177            }
178        }
179
180        if !field.is_nullable() && values.is_nullable() {
181            return Err(ArrowError::InvalidArgumentError(format!(
182                "Non-nullable field of {}ListViewArray {:?} cannot contain nulls",
183                OffsetSize::PREFIX,
184                field.name()
185            )));
186        }
187
188        if field.data_type() != values.data_type() {
189            return Err(ArrowError::InvalidArgumentError(format!(
190                "{}ListViewArray expected data type {} got {} for {:?}",
191                OffsetSize::PREFIX,
192                field.data_type(),
193                values.data_type(),
194                field.name()
195            )));
196        }
197
198        Ok(Self {
199            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
200            nulls,
201            values,
202            value_offsets: offsets,
203            value_sizes: sizes,
204        })
205    }
206
207    /// Create a new [`GenericListViewArray`] from the provided parts
208    ///
209    /// # Panics
210    ///
211    /// Panics if [`Self::try_new`] returns an error
212    pub fn new(
213        field: FieldRef,
214        offsets: ScalarBuffer<OffsetSize>,
215        sizes: ScalarBuffer<OffsetSize>,
216        values: ArrayRef,
217        nulls: Option<NullBuffer>,
218    ) -> Self {
219        Self::try_new(field, offsets, sizes, values, nulls).unwrap()
220    }
221
222    /// Create a new [`GenericListViewArray`] of length `len` where all values are null
223    pub fn new_null(field: FieldRef, len: usize) -> Self {
224        let values = new_empty_array(field.data_type());
225        Self {
226            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
227            nulls: Some(NullBuffer::new_null(len)),
228            value_offsets: ScalarBuffer::from(vec![]),
229            value_sizes: ScalarBuffer::from(vec![]),
230            values,
231        }
232    }
233
234    /// Deconstruct this array into its constituent parts
235    pub fn into_parts(
236        self,
237    ) -> (
238        FieldRef,
239        ScalarBuffer<OffsetSize>,
240        ScalarBuffer<OffsetSize>,
241        ArrayRef,
242        Option<NullBuffer>,
243    ) {
244        let f = match self.data_type {
245            DataType::ListView(f) | DataType::LargeListView(f) => f,
246            _ => unreachable!(),
247        };
248        (
249            f,
250            self.value_offsets,
251            self.value_sizes,
252            self.values,
253            self.nulls,
254        )
255    }
256
257    /// Returns a reference to the offsets of this list
258    ///
259    /// Unlike [`Self::value_offsets`] this returns the [`ScalarBuffer`]
260    /// allowing for zero-copy cloning
261    #[inline]
262    pub fn offsets(&self) -> &ScalarBuffer<OffsetSize> {
263        &self.value_offsets
264    }
265
266    /// Returns a reference to the values of this list
267    #[inline]
268    pub fn values(&self) -> &ArrayRef {
269        &self.values
270    }
271
272    /// Returns a reference to the sizes of this list
273    ///
274    /// Unlike [`Self::value_sizes`] this returns the [`ScalarBuffer`]
275    /// allowing for zero-copy cloning
276    #[inline]
277    pub fn sizes(&self) -> &ScalarBuffer<OffsetSize> {
278        &self.value_sizes
279    }
280
281    /// Returns a clone of the value type of this list.
282    pub fn value_type(&self) -> DataType {
283        self.values.data_type().clone()
284    }
285
286    /// Returns ith value of this list view array.
287    ///
288    /// Note: This method does not check for nulls and the value is arbitrary
289    /// if [`is_null`](Self::is_null) returns true for the index.
290    ///
291    /// # Safety
292    /// Caller must ensure that the index is within the array bounds
293    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
294        let offset = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
295        let length = unsafe { self.value_sizes().get_unchecked(i).as_usize() };
296        self.values.slice(offset, length)
297    }
298
299    /// Returns ith value of this list view array.
300    ///
301    /// Note: This method does not check for nulls and the value is arbitrary
302    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
303    ///
304    /// # Panics
305    /// Panics if the index is out of bounds
306    pub fn value(&self, i: usize) -> ArrayRef {
307        let offset = self.value_offsets()[i].as_usize();
308        let length = self.value_sizes()[i].as_usize();
309        self.values.slice(offset, length)
310    }
311
312    /// Returns the offset values in the offsets buffer
313    #[inline]
314    pub fn value_offsets(&self) -> &[OffsetSize] {
315        &self.value_offsets
316    }
317
318    /// Returns the sizes values in the offsets buffer
319    #[inline]
320    pub fn value_sizes(&self) -> &[OffsetSize] {
321        &self.value_sizes
322    }
323
324    /// Returns the size for value at index `i`.
325    #[inline]
326    pub fn value_size(&self, i: usize) -> OffsetSize {
327        self.value_sizes[i]
328    }
329
330    /// Returns the offset for value at index `i`.
331    pub fn value_offset(&self, i: usize) -> OffsetSize {
332        self.value_offsets[i]
333    }
334
335    /// Constructs a new iterator
336    pub fn iter(&self) -> GenericListViewArrayIter<'_, OffsetSize> {
337        GenericListViewArrayIter::<'_, OffsetSize>::new(self)
338    }
339
340    #[inline]
341    fn get_type(data_type: &DataType) -> Option<&DataType> {
342        match (OffsetSize::IS_LARGE, data_type) {
343            (true, DataType::LargeListView(child)) | (false, DataType::ListView(child)) => {
344                Some(child.data_type())
345            }
346            _ => None,
347        }
348    }
349
350    /// Returns a zero-copy slice of this array with the indicated offset and length.
351    pub fn slice(&self, offset: usize, length: usize) -> Self {
352        Self {
353            data_type: self.data_type.clone(),
354            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
355            values: self.values.clone(),
356            value_offsets: self.value_offsets.slice(offset, length),
357            value_sizes: self.value_sizes.slice(offset, length),
358        }
359    }
360}
361
362impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListViewArray<OffsetSize> {
363    type Item = ArrayRef;
364
365    fn value(&self, index: usize) -> Self::Item {
366        GenericListViewArray::value(self, index)
367    }
368
369    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
370        unsafe { GenericListViewArray::value_unchecked(self, index) }
371    }
372}
373
374impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
375    fn as_any(&self) -> &dyn Any {
376        self
377    }
378
379    fn to_data(&self) -> ArrayData {
380        self.clone().into()
381    }
382
383    fn into_data(self) -> ArrayData {
384        self.into()
385    }
386
387    fn data_type(&self) -> &DataType {
388        &self.data_type
389    }
390
391    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
392        Arc::new(self.slice(offset, length))
393    }
394
395    fn len(&self) -> usize {
396        self.sizes().len()
397    }
398
399    fn is_empty(&self) -> bool {
400        self.value_sizes.is_empty()
401    }
402
403    fn shrink_to_fit(&mut self) {
404        if let Some(nulls) = &mut self.nulls {
405            nulls.shrink_to_fit();
406        }
407        self.values.shrink_to_fit();
408        self.value_offsets.shrink_to_fit();
409        self.value_sizes.shrink_to_fit();
410    }
411
412    fn offset(&self) -> usize {
413        0
414    }
415
416    fn nulls(&self) -> Option<&NullBuffer> {
417        self.nulls.as_ref()
418    }
419
420    fn logical_null_count(&self) -> usize {
421        // More efficient that the default implementation
422        self.null_count()
423    }
424
425    fn get_buffer_memory_size(&self) -> usize {
426        let mut size = self.values.get_buffer_memory_size();
427        size += self.value_offsets.inner().capacity();
428        size += self.value_sizes.inner().capacity();
429        if let Some(n) = self.nulls.as_ref() {
430            size += n.buffer().capacity();
431        }
432        size
433    }
434
435    fn get_array_memory_size(&self) -> usize {
436        let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
437        size += self.value_offsets.inner().capacity();
438        size += self.value_sizes.inner().capacity();
439        if let Some(n) = self.nulls.as_ref() {
440            size += n.buffer().capacity();
441        }
442        size
443    }
444}
445
446impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<OffsetSize> {
447    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
448        let prefix = OffsetSize::PREFIX;
449        write!(f, "{prefix}ListViewArray\n[\n")?;
450        print_long_array(self, f, |array, index, f| {
451            std::fmt::Debug::fmt(&array.value(index), f)
452        })?;
453        write!(f, "]")
454    }
455}
456
457impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
458    fn from(array: GenericListViewArray<OffsetSize>) -> Self {
459        let len = array.len();
460        let builder = ArrayDataBuilder::new(array.data_type)
461            .len(len)
462            .nulls(array.nulls)
463            .buffers(vec![
464                array.value_offsets.into_inner(),
465                array.value_sizes.into_inner(),
466            ])
467            .child_data(vec![array.values.to_data()]);
468
469        unsafe { builder.build_unchecked() }
470    }
471}
472
473impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListViewArray<OffsetSize> {
474    fn from(data: ArrayData) -> Self {
475        Self::try_new_from_array_data(data)
476            .expect("Expected infallible creation of GenericListViewArray from ArrayDataRef failed")
477    }
478}
479
480impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewArray<OffsetSize> {
481    fn from(value: FixedSizeListArray) -> Self {
482        let (field, size) = match value.data_type() {
483            DataType::FixedSizeList(f, size) => (f, *size as usize),
484            _ => unreachable!(),
485        };
486        let mut acc = 0_usize;
487        let iter = std::iter::repeat_n(size, value.len());
488        let mut sizes = Vec::with_capacity(iter.size_hint().0);
489        let mut offsets = Vec::with_capacity(iter.size_hint().0);
490
491        for size in iter {
492            offsets.push(OffsetSize::usize_as(acc));
493            acc = acc.add(size);
494            sizes.push(OffsetSize::usize_as(size));
495        }
496        let sizes = ScalarBuffer::from(sizes);
497        let offsets = ScalarBuffer::from(offsets);
498        Self {
499            data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
500            nulls: value.nulls().cloned(),
501            values: value.values().clone(),
502            value_offsets: offsets,
503            value_sizes: sizes,
504        }
505    }
506}
507
508impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
509    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
510        if data.buffers().len() != 2 {
511            return Err(ArrowError::InvalidArgumentError(format!(
512                "ListViewArray data should contain two buffers (value offsets & value sizes), had {}",
513                data.buffers().len()
514            )));
515        }
516
517        if data.child_data().len() != 1 {
518            return Err(ArrowError::InvalidArgumentError(format!(
519                "ListViewArray should contain a single child array (values array), had {}",
520                data.child_data().len()
521            )));
522        }
523
524        let values = data.child_data()[0].clone();
525
526        if let Some(child_data_type) = Self::get_type(data.data_type()) {
527            if values.data_type() != child_data_type {
528                return Err(ArrowError::InvalidArgumentError(format!(
529                    "{}ListViewArray's child datatype {:?} does not \
530                             correspond to the List's datatype {:?}",
531                    OffsetSize::PREFIX,
532                    values.data_type(),
533                    child_data_type
534                )));
535            }
536        } else {
537            return Err(ArrowError::InvalidArgumentError(format!(
538                "{}ListViewArray's datatype must be {}ListViewArray(). It is {:?}",
539                OffsetSize::PREFIX,
540                OffsetSize::PREFIX,
541                data.data_type()
542            )));
543        }
544
545        let values = make_array(values);
546        // ArrayData is valid, and verified type above
547        let value_offsets = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
548        let value_sizes = ScalarBuffer::new(data.buffers()[1].clone(), data.offset(), data.len());
549
550        Ok(Self {
551            data_type: data.data_type().clone(),
552            nulls: data.nulls().cloned(),
553            values,
554            value_offsets,
555            value_sizes,
556        })
557    }
558}
559
560#[cfg(test)]
561mod tests {
562    use arrow_buffer::{BooleanBuffer, Buffer, ScalarBuffer, bit_util};
563    use arrow_schema::Field;
564
565    use crate::builder::{FixedSizeListBuilder, Int32Builder};
566    use crate::cast::AsArray;
567    use crate::types::Int32Type;
568    use crate::{Int32Array, Int64Array};
569
570    use super::*;
571
572    #[test]
573    fn test_empty_list_view_array() {
574        // Construct an empty value array
575        let vec: Vec<i32> = vec![];
576        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
577        let sizes = ScalarBuffer::from(vec![]);
578        let offsets = ScalarBuffer::from(vec![]);
579        let values = Int32Array::from(vec);
580        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
581
582        assert_eq!(list_array.len(), 0)
583    }
584
585    #[test]
586    fn test_list_view_array() {
587        // Construct a value array
588        let value_data = ArrayData::builder(DataType::Int32)
589            .len(8)
590            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
591            .build()
592            .unwrap();
593
594        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
595        let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
596        let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
597        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
598        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
599
600        let values = list_array.values();
601        assert_eq!(value_data, values.to_data());
602        assert_eq!(DataType::Int32, list_array.value_type());
603        assert_eq!(3, list_array.len());
604        assert_eq!(0, list_array.null_count());
605        assert_eq!(6, list_array.value_offsets()[2]);
606        assert_eq!(2, list_array.value_sizes()[2]);
607        assert_eq!(2, list_array.value_size(2));
608        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
609        assert_eq!(
610            0,
611            unsafe { list_array.value_unchecked(0) }
612                .as_primitive::<Int32Type>()
613                .value(0)
614        );
615        for i in 0..3 {
616            assert!(list_array.is_valid(i));
617            assert!(!list_array.is_null(i));
618        }
619    }
620
621    #[test]
622    fn test_large_list_view_array() {
623        // Construct a value array
624        let value_data = ArrayData::builder(DataType::Int32)
625            .len(8)
626            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
627            .build()
628            .unwrap();
629
630        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
631        let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
632        let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
633        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
634        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
635
636        let values = list_array.values();
637        assert_eq!(value_data, values.to_data());
638        assert_eq!(DataType::Int32, list_array.value_type());
639        assert_eq!(3, list_array.len());
640        assert_eq!(0, list_array.null_count());
641        assert_eq!(6, list_array.value_offsets()[2]);
642        assert_eq!(2, list_array.value_sizes()[2]);
643        assert_eq!(2, list_array.value_size(2));
644        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
645        assert_eq!(
646            0,
647            unsafe { list_array.value_unchecked(0) }
648                .as_primitive::<Int32Type>()
649                .value(0)
650        );
651        for i in 0..3 {
652            assert!(list_array.is_valid(i));
653            assert!(!list_array.is_null(i));
654        }
655    }
656
657    #[test]
658    fn test_list_view_array_slice() {
659        // Construct a value array
660        let value_data = ArrayData::builder(DataType::Int32)
661            .len(10)
662            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
663            .build()
664            .unwrap();
665
666        // 01011001 00000001
667        let mut null_bits: [u8; 2] = [0; 2];
668        bit_util::set_bit(&mut null_bits, 0);
669        bit_util::set_bit(&mut null_bits, 3);
670        bit_util::set_bit(&mut null_bits, 4);
671        bit_util::set_bit(&mut null_bits, 6);
672        bit_util::set_bit(&mut null_bits, 8);
673        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
674        let null_buffer = NullBuffer::new(buffer);
675
676        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
677        let sizes = ScalarBuffer::from(vec![2, 0, 0, 2, 2, 0, 3, 0, 1]);
678        let offsets = ScalarBuffer::from(vec![0, 2, 2, 2, 4, 6, 6, 9, 9]);
679        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
680        let list_array =
681            ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
682
683        let values = list_array.values();
684        assert_eq!(value_data, values.to_data());
685        assert_eq!(DataType::Int32, list_array.value_type());
686        assert_eq!(9, list_array.len());
687        assert_eq!(4, list_array.null_count());
688        assert_eq!(2, list_array.value_offsets()[3]);
689        assert_eq!(2, list_array.value_sizes()[3]);
690        assert_eq!(2, list_array.value_size(3));
691
692        let sliced_array = list_array.slice(1, 6);
693        assert_eq!(6, sliced_array.len());
694        assert_eq!(3, sliced_array.null_count());
695
696        for i in 0..sliced_array.len() {
697            if bit_util::get_bit(&null_bits, 1 + i) {
698                assert!(sliced_array.is_valid(i));
699            } else {
700                assert!(sliced_array.is_null(i));
701            }
702        }
703
704        // Check offset and length for each non-null value.
705        let sliced_list_array = sliced_array
706            .as_any()
707            .downcast_ref::<ListViewArray>()
708            .unwrap();
709        assert_eq!(2, sliced_list_array.value_offsets()[2]);
710        assert_eq!(2, sliced_list_array.value_sizes()[2]);
711        assert_eq!(2, sliced_list_array.value_size(2));
712
713        assert_eq!(4, sliced_list_array.value_offsets()[3]);
714        assert_eq!(2, sliced_list_array.value_sizes()[3]);
715        assert_eq!(2, sliced_list_array.value_size(3));
716
717        assert_eq!(6, sliced_list_array.value_offsets()[5]);
718        assert_eq!(3, sliced_list_array.value_sizes()[5]);
719        assert_eq!(3, sliced_list_array.value_size(5));
720    }
721
722    #[test]
723    fn test_large_list_view_array_slice() {
724        // Construct a value array
725        let value_data = ArrayData::builder(DataType::Int32)
726            .len(10)
727            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
728            .build()
729            .unwrap();
730
731        // 01011001 00000001
732        let mut null_bits: [u8; 2] = [0; 2];
733        bit_util::set_bit(&mut null_bits, 0);
734        bit_util::set_bit(&mut null_bits, 3);
735        bit_util::set_bit(&mut null_bits, 4);
736        bit_util::set_bit(&mut null_bits, 6);
737        bit_util::set_bit(&mut null_bits, 8);
738        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
739        let null_buffer = NullBuffer::new(buffer);
740
741        // Construct a large list view array from the above two
742        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
743        let sizes = ScalarBuffer::from(vec![2i64, 0, 0, 2, 2, 0, 3, 0, 1]);
744        let offsets = ScalarBuffer::from(vec![0i64, 2, 2, 2, 4, 6, 6, 9, 9]);
745        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
746        let list_array =
747            LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
748
749        let values = list_array.values();
750        assert_eq!(value_data, values.to_data());
751        assert_eq!(DataType::Int32, list_array.value_type());
752        assert_eq!(9, list_array.len());
753        assert_eq!(4, list_array.null_count());
754        assert_eq!(2, list_array.value_offsets()[3]);
755        assert_eq!(2, list_array.value_sizes()[3]);
756        assert_eq!(2, list_array.value_size(3));
757
758        let sliced_array = list_array.slice(1, 6);
759        assert_eq!(6, sliced_array.len());
760        assert_eq!(3, sliced_array.null_count());
761
762        for i in 0..sliced_array.len() {
763            if bit_util::get_bit(&null_bits, 1 + i) {
764                assert!(sliced_array.is_valid(i));
765            } else {
766                assert!(sliced_array.is_null(i));
767            }
768        }
769
770        // Check offset and length for each non-null value.
771        let sliced_list_array = sliced_array
772            .as_any()
773            .downcast_ref::<LargeListViewArray>()
774            .unwrap();
775        assert_eq!(2, sliced_list_array.value_offsets()[2]);
776        assert_eq!(2, sliced_list_array.value_size(2));
777        assert_eq!(2, sliced_list_array.value_sizes()[2]);
778
779        assert_eq!(4, sliced_list_array.value_offsets()[3]);
780        assert_eq!(2, sliced_list_array.value_size(3));
781        assert_eq!(2, sliced_list_array.value_sizes()[3]);
782
783        assert_eq!(6, sliced_list_array.value_offsets()[5]);
784        assert_eq!(3, sliced_list_array.value_size(5));
785        assert_eq!(2, sliced_list_array.value_sizes()[3]);
786    }
787
788    #[test]
789    #[should_panic(expected = "index out of bounds: the len is 9 but the index is 10")]
790    fn test_list_view_array_index_out_of_bound() {
791        // 01011001 00000001
792        let mut null_bits: [u8; 2] = [0; 2];
793        bit_util::set_bit(&mut null_bits, 0);
794        bit_util::set_bit(&mut null_bits, 3);
795        bit_util::set_bit(&mut null_bits, 4);
796        bit_util::set_bit(&mut null_bits, 6);
797        bit_util::set_bit(&mut null_bits, 8);
798        let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9);
799        let null_buffer = NullBuffer::new(buffer);
800
801        // Construct a buffer for value offsets, for the nested array:
802        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
803        // Construct a list array from the above two
804        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
805        let sizes = ScalarBuffer::from(vec![2i32, 0, 0, 2, 2, 0, 3, 0, 1]);
806        let offsets = ScalarBuffer::from(vec![0i32, 2, 2, 2, 4, 6, 6, 9, 9]);
807        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
808        let list_array =
809            ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer));
810
811        assert_eq!(9, list_array.len());
812        list_array.value(10);
813    }
814    #[test]
815    #[should_panic(
816        expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 0"
817    )]
818    #[cfg(not(feature = "force_validate"))]
819    fn test_list_view_array_invalid_buffer_len() {
820        let value_data = unsafe {
821            ArrayData::builder(DataType::Int32)
822                .len(8)
823                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
824                .build_unchecked()
825        };
826        let list_data_type =
827            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
828        let list_data = unsafe {
829            ArrayData::builder(list_data_type)
830                .len(3)
831                .add_child_data(value_data)
832                .build_unchecked()
833        };
834        drop(ListViewArray::from(list_data));
835    }
836
837    #[test]
838    #[should_panic(
839        expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 1"
840    )]
841    #[cfg(not(feature = "force_validate"))]
842    fn test_list_view_array_invalid_child_array_len() {
843        let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
844        let list_data_type =
845            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
846        let list_data = unsafe {
847            ArrayData::builder(list_data_type)
848                .len(3)
849                .add_buffer(value_offsets)
850                .build_unchecked()
851        };
852        drop(ListViewArray::from(list_data));
853    }
854
855    #[test]
856    fn test_list_view_array_offsets_need_not_start_at_zero() {
857        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
858        let sizes = ScalarBuffer::from(vec![0i32, 0, 3]);
859        let offsets = ScalarBuffer::from(vec![2i32, 2, 5]);
860        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
861        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
862
863        assert_eq!(list_array.value_size(0), 0);
864        assert_eq!(list_array.value_size(1), 0);
865        assert_eq!(list_array.value_size(2), 3);
866    }
867
868    #[test]
869    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
870    #[cfg(not(feature = "force_validate"))]
871    fn test_list_view_array_alignment() {
872        let offset_buf = Buffer::from_slice_ref([0_u64]);
873        let offset_buf2 = offset_buf.slice(1);
874
875        let size_buf = Buffer::from_slice_ref([0_u64]);
876        let size_buf2 = size_buf.slice(1);
877
878        let values: [i32; 8] = [0; 8];
879        let value_data = unsafe {
880            ArrayData::builder(DataType::Int32)
881                .add_buffer(Buffer::from_slice_ref(values))
882                .build_unchecked()
883        };
884
885        let list_data_type =
886            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
887        let list_data = unsafe {
888            ArrayData::builder(list_data_type)
889                .add_buffer(offset_buf2)
890                .add_buffer(size_buf2)
891                .add_child_data(value_data)
892                .build_unchecked()
893        };
894        drop(ListViewArray::from(list_data));
895    }
896
897    #[test]
898    fn test_empty_offsets() {
899        let f = Arc::new(Field::new("element", DataType::Int32, true));
900        let string = ListViewArray::from(
901            ArrayData::builder(DataType::ListView(f.clone()))
902                .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
903                .add_child_data(ArrayData::new_empty(&DataType::Int32))
904                .build()
905                .unwrap(),
906        );
907        assert_eq!(string.value_offsets(), &[] as &[i32; 0]);
908        assert_eq!(string.value_sizes(), &[] as &[i32; 0]);
909
910        let string = LargeListViewArray::from(
911            ArrayData::builder(DataType::LargeListView(f))
912                .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
913                .add_child_data(ArrayData::new_empty(&DataType::Int32))
914                .build()
915                .unwrap(),
916        );
917        assert_eq!(string.len(), 0);
918        assert_eq!(string.value_offsets(), &[] as &[i64; 0]);
919        assert_eq!(string.value_sizes(), &[] as &[i64; 0]);
920    }
921
922    #[test]
923    fn test_try_new() {
924        let offsets = ScalarBuffer::from(vec![0, 1, 4, 5]);
925        let sizes = ScalarBuffer::from(vec![1, 3, 1, 0]);
926        let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
927        let values = Arc::new(values) as ArrayRef;
928
929        let field = Arc::new(Field::new("element", DataType::Int32, false));
930        ListViewArray::new(
931            field.clone(),
932            offsets.clone(),
933            sizes.clone(),
934            values.clone(),
935            None,
936        );
937
938        let nulls = NullBuffer::new_null(4);
939        ListViewArray::new(
940            field.clone(),
941            offsets,
942            sizes.clone(),
943            values.clone(),
944            Some(nulls),
945        );
946
947        let nulls = NullBuffer::new_null(4);
948        let offsets = ScalarBuffer::from(vec![0, 1, 2, 3, 4]);
949        let sizes = ScalarBuffer::from(vec![1, 1, 1, 1, 0]);
950        let err = LargeListViewArray::try_new(
951            field,
952            offsets.clone(),
953            sizes.clone(),
954            values.clone(),
955            Some(nulls),
956        )
957        .unwrap_err();
958
959        assert_eq!(
960            err.to_string(),
961            "Invalid argument error: Incorrect length of null buffer for LargeListViewArray, expected 5 got 4"
962        );
963
964        let field = Arc::new(Field::new("element", DataType::Int64, false));
965        let err = LargeListViewArray::try_new(
966            field.clone(),
967            offsets.clone(),
968            sizes.clone(),
969            values.clone(),
970            None,
971        )
972        .unwrap_err();
973
974        assert_eq!(
975            err.to_string(),
976            "Invalid argument error: LargeListViewArray expected data type Int64 got Int32 for \"element\""
977        );
978
979        let nulls = NullBuffer::new_null(7);
980        let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
981        let values = Arc::new(values);
982
983        let err = LargeListViewArray::try_new(
984            field,
985            offsets.clone(),
986            sizes.clone(),
987            values.clone(),
988            None,
989        )
990        .unwrap_err();
991
992        assert_eq!(
993            err.to_string(),
994            "Invalid argument error: Non-nullable field of LargeListViewArray \"element\" cannot contain nulls"
995        );
996    }
997
998    #[test]
999    fn test_from_fixed_size_list() {
1000        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1001        builder.values().append_slice(&[1, 2, 3]);
1002        builder.append(true);
1003        builder.values().append_slice(&[0, 0, 0]);
1004        builder.append(false);
1005        builder.values().append_slice(&[4, 5, 6]);
1006        builder.append(true);
1007        let list: ListViewArray = builder.finish().into();
1008        let values: Vec<_> = list
1009            .iter()
1010            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1011            .collect();
1012        assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])]);
1013        let offsets = list.value_offsets();
1014        assert_eq!(offsets, &[0, 3, 6]);
1015        let sizes = list.value_sizes();
1016        assert_eq!(sizes, &[3, 3, 3]);
1017    }
1018
1019    #[test]
1020    fn test_list_view_array_overlap_lists() {
1021        let value_data = unsafe {
1022            ArrayData::builder(DataType::Int32)
1023                .len(8)
1024                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1025                .build_unchecked()
1026        };
1027        let list_data_type =
1028            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
1029        let list_data = unsafe {
1030            ArrayData::builder(list_data_type)
1031                .len(2)
1032                .add_buffer(Buffer::from_slice_ref([0, 3])) // offsets
1033                .add_buffer(Buffer::from_slice_ref([5, 5])) // sizes
1034                .add_child_data(value_data)
1035                .build_unchecked()
1036        };
1037        let array = ListViewArray::from(list_data);
1038
1039        assert_eq!(array.len(), 2);
1040        assert_eq!(array.value_size(0), 5);
1041        assert_eq!(array.value_size(1), 5);
1042
1043        let values: Vec<_> = array
1044            .iter()
1045            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1046            .collect();
1047        assert_eq!(
1048            values,
1049            vec![Some(vec![0, 1, 2, 3, 4]), Some(vec![3, 4, 5, 6, 7])]
1050        );
1051    }
1052
1053    #[test]
1054    fn test_list_view_array_incomplete_offsets() {
1055        let value_data = unsafe {
1056            ArrayData::builder(DataType::Int32)
1057                .len(50)
1058                .add_buffer(Buffer::from_slice_ref((0..50).collect::<Vec<i32>>()))
1059                .build_unchecked()
1060        };
1061        let list_data_type =
1062            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
1063        let list_data = unsafe {
1064            ArrayData::builder(list_data_type)
1065                .len(3)
1066                .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // offsets
1067                .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // sizes
1068                .add_child_data(value_data)
1069                .build_unchecked()
1070        };
1071        let array = ListViewArray::from(list_data);
1072
1073        assert_eq!(array.len(), 3);
1074        assert_eq!(array.value_size(0), 0);
1075        assert_eq!(array.value_size(1), 5);
1076        assert_eq!(array.value_size(2), 10);
1077
1078        let values: Vec<_> = array
1079            .iter()
1080            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1081            .collect();
1082        assert_eq!(
1083            values,
1084            vec![
1085                Some(vec![]),
1086                Some(vec![5, 6, 7, 8, 9]),
1087                Some(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
1088            ]
1089        );
1090    }
1091
1092    #[test]
1093    fn test_list_view_array_empty_lists() {
1094        let value_data = unsafe {
1095            ArrayData::builder(DataType::Int32)
1096                .len(0)
1097                .add_buffer(Buffer::from_slice_ref::<i32, &[_; 0]>(&[]))
1098                .build_unchecked()
1099        };
1100        let list_data_type =
1101            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false)));
1102        let list_data = unsafe {
1103            ArrayData::builder(list_data_type)
1104                .len(3)
1105                .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // offsets
1106                .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // sizes
1107                .add_child_data(value_data)
1108                .build_unchecked()
1109        };
1110        let array = ListViewArray::from(list_data);
1111
1112        assert_eq!(array.len(), 3);
1113        assert_eq!(array.value_size(0), 0);
1114        assert_eq!(array.value_size(1), 0);
1115        assert_eq!(array.value_size(2), 0);
1116
1117        let values: Vec<_> = array
1118            .iter()
1119            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1120            .collect();
1121        assert_eq!(values, vec![Some(vec![]), Some(vec![]), Some(vec![])]);
1122    }
1123}