Skip to main content

arrow_array/array/
list_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::{get_offsets_from_buffer, make_array, print_long_array};
19use crate::builder::{ArrayBuilder, GenericListBuilder, PrimitiveBuilder};
20use crate::{
21    Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
22    iterator::GenericListArrayIter, new_empty_array,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num_integer::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31/// A type that can be used within a variable-size array to encode offset information
32///
33/// See [`ListArray`], [`LargeListArray`], [`BinaryArray`], [`LargeBinaryArray`],
34/// [`StringArray`] and [`LargeStringArray`]
35///
36/// [`BinaryArray`]: crate::array::BinaryArray
37/// [`LargeBinaryArray`]: crate::array::LargeBinaryArray
38/// [`StringArray`]: crate::array::StringArray
39/// [`LargeStringArray`]: crate::array::LargeStringArray
40pub trait OffsetSizeTrait:
41    ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd
42{
43    /// True for 64 bit offset size and false for 32 bit offset size
44    const IS_LARGE: bool;
45    /// Prefix for the offset size
46    const PREFIX: &'static str;
47    /// The max `usize` offset
48    const MAX_OFFSET: usize;
49}
50
51impl OffsetSizeTrait for i32 {
52    const IS_LARGE: bool = false;
53    const PREFIX: &'static str = "";
54    const MAX_OFFSET: usize = i32::MAX as usize;
55}
56
57impl OffsetSizeTrait for i64 {
58    const IS_LARGE: bool = true;
59    const PREFIX: &'static str = "Large";
60    const MAX_OFFSET: usize = i64::MAX as usize;
61}
62
63/// An array of [variable length lists], similar to JSON arrays
64/// (e.g. `["A", "B", "C"]`). This struct specifically represents
65/// the [list layout]. Refer to [`GenericListViewArray`] for the
66/// [list-view layout].
67///
68/// Lists are represented using `offsets` into a `values` child
69/// array. Offsets are stored in two adjacent entries of an
70/// [`OffsetBuffer`].
71///
72/// Arrow defines [`ListArray`] with `i32` offsets and
73/// [`LargeListArray`] with `i64` offsets.
74///
75/// Use [`GenericListBuilder`] to construct a [`GenericListArray`].
76///
77/// # Representation
78///
79/// A [`ListArray`] can represent a list of values of any other
80/// supported Arrow type. Each element of the `ListArray` itself is
81/// a list which may be empty, may contain NULL and non-null values,
82/// or may itself be NULL.
83///
84/// For example, the `ListArray` shown in the following diagram stores
85/// lists of strings. Note that `[]` represents an empty (length
86/// 0), but non NULL list.
87///
88/// ```text
89/// ┌─────────────┐
90/// │   [A,B,C]   │
91/// ├─────────────┤
92/// │     []      │
93/// ├─────────────┤
94/// │    NULL     │
95/// ├─────────────┤
96/// │     [D]     │
97/// ├─────────────┤
98/// │  [NULL, F]  │
99/// └─────────────┘
100/// ```
101///
102/// The `values` are stored in a child [`StringArray`] and the offsets
103/// are stored in an [`OffsetBuffer`] as shown in the following
104/// diagram. The logical values and offsets are shown on the left, and
105/// the actual `ListArray` encoding on the right.
106///
107/// ```text
108///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
109///                                                                 ┌ ─ ─ ─ ─ ─ ─ ┐    │
110///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐       ┌───┐ ┌───┐
111///  │   [A,B,C]   │  │ (0,3) │                   │ 1 │   │ 0 │     │ │ 1 │ │ A │ │ 0  │
112///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
113///  │ [] (empty)  │  │ (3,3) │                   │ 1 │   │ 3 │     │ │ 1 │ │ B │ │ 1  │
114///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
115///  │    NULL     │  │ (3,3) │                   │ 0 │   │ 3 │     │ │ 1 │ │ C │ │ 2  │
116///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
117///  │     [D]     │  │ (3,4) │                   │ 1 │   │ 3 │     │ │ 1 │ │ D │ │ 3  │
118///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
119///  │  [NULL, F]  │  │ (4,6) │                   │ 1 │   │ 4 │     │ │ 0 │ │ ? │ │ 4  │
120///  └─────────────┘  └───────┘             │     └───┘   ├───┤       ├───┤ ├───┤
121///                                                       │ 6 │     │ │ 1 │ │ F │ │ 5  │
122///                                         │  Validity   └───┘       └───┘ └───┘
123///     Logical       Logical                  (nulls)   Offsets    │    Values   │    │
124///      Values       Offsets               │                           (Array)
125///                                                                 └ ─ ─ ─ ─ ─ ─ ┘    │
126///                 (offsets[i],            │   ListArray
127///                offsets[i+1])                                                       │
128///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
129/// ```
130///
131/// # Slicing
132///
133/// Slicing a `ListArray` creates a new `ListArray` without copying any data,
134/// but this means the [`Self::values`] and [`Self::offsets`] may have "unused" data
135///
136/// For example, calling `slice(1, 3)` on the `ListArray` in the above example
137/// would result in the following. Note
138///
139/// 1. `Values` array is unchanged
140/// 2. `Offsets` do not start at `0`, nor cover all values in the Values array.
141///
142/// ```text
143///                                 ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
144///                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    │  ╔═══╗
145///                                 │                         ╔═══╗ ╔═══╗         ║   ║  Not used
146///                                                         │ ║ 1 ║ ║ A ║ │ 0  │  ╚═══╝
147///  ┌─────────────┐  ┌───────┐     │     ┌───┐   ┌───┐       ╠═══╣ ╠═══╣
148///  │ [] (empty)  │  │ (3,3) │           │ 1 │   │ 3 │     │ ║ 1 ║ ║ B ║ │ 1  │
149///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╠═══╣ ╠═══╣
150///  │    NULL     │  │ (3,3) │           │ 0 │   │ 3 │     │ ║ 1 ║ ║ C ║ │ 2  │
151///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╚═══╝ ╚═══╝
152///  │     [D]     │  │ (3,4) │           │ 1 │   │ 3 │     │ │ 1 │ │ D │ │ 3  │
153///  └─────────────┘  └───────┘     │     └───┘   ├───┤       ╔═══╗ ╔═══╗
154///                                               │ 4 │     │ ║ 0 ║ ║ ? ║ │ 4  │
155///                                 │             └───┘       ╠═══╣ ╠═══╣
156///                                                         │ ║ 1 ║ ║ F ║ │ 5  │
157///                                 │  Validity               ╚═══╝ ╚═══╝
158///     Logical       Logical          (nulls)   Offsets    │    Values   │    │
159///      Values       Offsets       │                           (Array)
160///                                                         └ ─ ─ ─ ─ ─ ─ ┘    │
161///                 (offsets[i],    │   ListArray
162///                offsets[i+1])                                               │
163///                                 └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
164/// ```
165///
166/// [`StringArray`]: crate::array::StringArray
167/// [`GenericListViewArray`]: crate::array::GenericListViewArray
168/// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout
169/// [list layout]: https://arrow.apache.org/docs/format/Columnar.html#list-layout
170/// [list-view layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout
171pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
172    data_type: DataType,
173    nulls: Option<NullBuffer>,
174    values: ArrayRef,
175    value_offsets: OffsetBuffer<OffsetSize>,
176}
177
178impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
179    fn clone(&self) -> Self {
180        Self {
181            data_type: self.data_type.clone(),
182            nulls: self.nulls.clone(),
183            values: self.values.clone(),
184            value_offsets: self.value_offsets.clone(),
185        }
186    }
187}
188
189impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
190    /// The data type constructor of list array.
191    /// The input is the schema of the child array and
192    /// the output is the [`DataType`], List or LargeList.
193    pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
194        DataType::LargeList
195    } else {
196        DataType::List
197    };
198
199    /// Create a new [`GenericListArray`] from the provided parts
200    ///
201    /// # Errors
202    ///
203    /// Errors if
204    ///
205    /// * `offsets.len() - 1 != nulls.len()`
206    /// * `offsets.last() > values.len()`
207    /// * `!field.is_nullable() && values.is_nullable()`
208    /// * `field.data_type() != values.data_type()`
209    pub fn try_new(
210        field: FieldRef,
211        offsets: OffsetBuffer<OffsetSize>,
212        values: ArrayRef,
213        nulls: Option<NullBuffer>,
214    ) -> Result<Self, ArrowError> {
215        let len = offsets.len() - 1; // Offsets guaranteed to not be empty
216        let end_offset = offsets.last().unwrap().as_usize();
217        // don't need to check other values of `offsets` because they are checked
218        // during construction of `OffsetBuffer`
219        if end_offset > values.len() {
220            return Err(ArrowError::InvalidArgumentError(format!(
221                "Max offset of {end_offset} exceeds length of values {}",
222                values.len()
223            )));
224        }
225
226        if let Some(n) = nulls.as_ref() {
227            if n.len() != len {
228                return Err(ArrowError::InvalidArgumentError(format!(
229                    "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
230                    OffsetSize::PREFIX,
231                    n.len(),
232                )));
233            }
234        }
235        if !field.is_nullable() && values.is_nullable() {
236            return Err(ArrowError::InvalidArgumentError(format!(
237                "Non-nullable field of {}ListArray {:?} cannot contain nulls",
238                OffsetSize::PREFIX,
239                field.name()
240            )));
241        }
242
243        if field.data_type() != values.data_type() {
244            return Err(ArrowError::InvalidArgumentError(format!(
245                "{}ListArray expected data type {} got {} for {:?}",
246                OffsetSize::PREFIX,
247                field.data_type(),
248                values.data_type(),
249                field.name()
250            )));
251        }
252
253        Ok(Self {
254            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
255            nulls,
256            values,
257            value_offsets: offsets,
258        })
259    }
260
261    /// Create a new [`GenericListArray`] from the provided parts
262    ///
263    /// # Panics
264    ///
265    /// Panics if [`Self::try_new`] returns an error
266    pub fn new(
267        field: FieldRef,
268        offsets: OffsetBuffer<OffsetSize>,
269        values: ArrayRef,
270        nulls: Option<NullBuffer>,
271    ) -> Self {
272        Self::try_new(field, offsets, values, nulls).unwrap()
273    }
274
275    /// Create a new [`GenericListArray`] of length `len` where all values are null
276    pub fn new_null(field: FieldRef, len: usize) -> Self {
277        let values = new_empty_array(field.data_type());
278        Self {
279            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
280            nulls: Some(NullBuffer::new_null(len)),
281            value_offsets: OffsetBuffer::new_zeroed(len),
282            values,
283        }
284    }
285
286    /// Deconstruct this array into its constituent parts
287    pub fn into_parts(
288        self,
289    ) -> (
290        FieldRef,
291        OffsetBuffer<OffsetSize>,
292        ArrayRef,
293        Option<NullBuffer>,
294    ) {
295        let f = match self.data_type {
296            DataType::List(f) | DataType::LargeList(f) => f,
297            _ => unreachable!(),
298        };
299        (f, self.value_offsets, self.values, self.nulls)
300    }
301
302    /// Returns a reference to the offsets of this list
303    ///
304    /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
305    /// allowing for zero-copy cloning.
306    ///
307    /// Notes: The `offsets` may not start at 0 and may not cover all values in
308    /// [`Self::values`]. This can happen when the list array was sliced via
309    /// [`Self::slice`]. See documentation for [`Self`] for more details.
310    #[inline]
311    pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
312        &self.value_offsets
313    }
314
315    /// Returns a reference to the values of this list
316    ///
317    /// Note: The list array may not refer to all values in the `values` array.
318    /// For example if the list array was sliced via [`Self::slice`] values will
319    /// still contain values both before and after the slice. See documentation
320    /// for [`Self`] for more details.
321    #[inline]
322    pub fn values(&self) -> &ArrayRef {
323        &self.values
324    }
325
326    /// Returns a clone of the value type of this list.
327    pub fn value_type(&self) -> DataType {
328        self.values.data_type().clone()
329    }
330
331    /// Returns ith value of this list array.
332    ///
333    /// Note: This method does not check for nulls and the value is arbitrary
334    /// if [`is_null`](Self::is_null) returns true for the index.
335    ///
336    /// # Safety
337    /// Caller must ensure that the index is within the array bounds
338    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
339        let end = unsafe { self.value_offsets().get_unchecked(i + 1).as_usize() };
340        let start = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
341        self.values.slice(start, end - start)
342    }
343
344    /// Returns ith value of this list array.
345    ///
346    /// Note: This method does not check for nulls and the value is arbitrary
347    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
348    ///
349    /// # Panics
350    /// Panics if index `i` is out of bounds
351    pub fn value(&self, i: usize) -> ArrayRef {
352        let end = self.value_offsets()[i + 1].as_usize();
353        let start = self.value_offsets()[i].as_usize();
354        self.values.slice(start, end - start)
355    }
356
357    /// Returns the offset values in the offsets buffer.
358    ///
359    /// See [`Self::offsets`] for more details.
360    #[inline]
361    pub fn value_offsets(&self) -> &[OffsetSize] {
362        &self.value_offsets
363    }
364
365    /// Returns the length for value at index `i`.
366    #[inline]
367    pub fn value_length(&self, i: usize) -> OffsetSize {
368        let offsets = self.value_offsets();
369        offsets[i + 1] - offsets[i]
370    }
371
372    /// constructs a new iterator
373    pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
374        GenericListArrayIter::<'a, OffsetSize>::new(self)
375    }
376
377    #[inline]
378    fn get_type(data_type: &DataType) -> Option<&DataType> {
379        match (OffsetSize::IS_LARGE, data_type) {
380            (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
381                Some(child.data_type())
382            }
383            _ => None,
384        }
385    }
386
387    /// Returns a zero-copy slice of this array with the indicated offset and length.
388    ///
389    /// Notes: this method does *NOT* slice the underlying values array or modify
390    /// the values in the offsets buffer. See [`Self::values`] and
391    /// [`Self::offsets`] for more information.
392    pub fn slice(&self, offset: usize, length: usize) -> Self {
393        Self {
394            data_type: self.data_type.clone(),
395            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
396            values: self.values.clone(),
397            value_offsets: self.value_offsets.slice(offset, length),
398        }
399    }
400
401    /// Creates a [`GenericListArray`] from an iterator of primitive values
402    /// # Example
403    /// ```
404    /// # use arrow_array::ListArray;
405    /// # use arrow_array::types::Int32Type;
406    ///
407    /// let data = vec![
408    ///    Some(vec![Some(0), Some(1), Some(2)]),
409    ///    None,
410    ///    Some(vec![Some(3), None, Some(5)]),
411    ///    Some(vec![Some(6), Some(7)]),
412    /// ];
413    /// let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
414    /// println!("{:?}", list_array);
415    /// ```
416    pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
417    where
418        T: ArrowPrimitiveType,
419        P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
420        I: IntoIterator<Item = Option<P>>,
421    {
422        Self::from_nested_iter::<PrimitiveBuilder<T>, T::Native, P, I>(iter)
423    }
424
425    /// Creates a [`GenericListArray`] from a nested iterator of values.
426    /// This method works for any values type that has a corresponding builder that implements the
427    /// `Extend` trait. That includes all numeric types, booleans, binary and string types and also
428    /// dictionary encoded binary and strings.
429    ///
430    /// # Example
431    /// ```
432    /// # use arrow_array::ListArray;
433    /// # use arrow_array::types::Int32Type;
434    /// # use arrow_array::builder::StringDictionaryBuilder;
435    /// let data = vec![
436    ///    Some(vec![Some("foo"), Some("bar"), Some("baz")]),
437    ///    None,
438    ///    Some(vec![Some("bar"), None, Some("foo")]),
439    ///    Some(vec![]),
440    /// ];
441    /// let list_array = ListArray::from_nested_iter::<StringDictionaryBuilder<Int32Type>, _, _, _>(data);
442    /// println!("{:?}", list_array);
443    /// ```
444    pub fn from_nested_iter<B, T, P, I>(iter: I) -> Self
445    where
446        B: ArrayBuilder + Default + Extend<Option<T>>,
447        P: IntoIterator<Item = Option<T>>,
448        I: IntoIterator<Item = Option<P>>,
449    {
450        let iter = iter.into_iter();
451        let size_hint = iter.size_hint().0;
452        let mut builder = GenericListBuilder::with_capacity(B::default(), size_hint);
453
454        for i in iter {
455            match i {
456                Some(p) => {
457                    builder.values().extend(p);
458                    builder.append(true);
459                }
460                None => builder.append(false),
461            }
462        }
463        builder.finish()
464    }
465}
466
467impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
468    fn from(data: ArrayData) -> Self {
469        Self::try_new_from_array_data(data)
470            .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
471    }
472}
473
474impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
475    fn from(array: GenericListArray<OffsetSize>) -> Self {
476        let len = array.len();
477        let builder = ArrayDataBuilder::new(array.data_type)
478            .len(len)
479            .nulls(array.nulls)
480            .buffers(vec![array.value_offsets.into_inner().into_inner()])
481            .child_data(vec![array.values.to_data()]);
482
483        unsafe { builder.build_unchecked() }
484    }
485}
486
487impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
488    fn from(value: FixedSizeListArray) -> Self {
489        let (field, size) = match value.data_type() {
490            DataType::FixedSizeList(f, size) => (f, *size as usize),
491            _ => unreachable!(),
492        };
493
494        let offsets = OffsetBuffer::from_repeated_length(size, value.len());
495
496        Self {
497            data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
498            nulls: value.nulls().cloned(),
499            values: value.values().clone(),
500            value_offsets: offsets,
501        }
502    }
503}
504
505impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
506    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
507        let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
508
509        if buffers.len() != 1 {
510            return Err(ArrowError::InvalidArgumentError(format!(
511                "ListArray data should contain a single buffer only (value offsets), had {}",
512                buffers.len()
513            )));
514        }
515        let buffer = buffers.pop().expect("checked above");
516
517        if child_data.len() != 1 {
518            return Err(ArrowError::InvalidArgumentError(format!(
519                "ListArray should contain a single child array (values array), had {}",
520                child_data.len()
521            )));
522        }
523
524        let values = child_data.pop().expect("checked above");
525
526        if let Some(child_data_type) = Self::get_type(&data_type) {
527            if values.data_type() != child_data_type {
528                return Err(ArrowError::InvalidArgumentError(format!(
529                    "[Large]ListArray's child datatype {:?} does not \
530                             correspond to the List's datatype {:?}",
531                    values.data_type(),
532                    child_data_type
533                )));
534            }
535        } else {
536            return Err(ArrowError::InvalidArgumentError(format!(
537                "[Large]ListArray's datatype must be [Large]ListArray(). It is {data_type:?}",
538            )));
539        }
540
541        let values = make_array(values);
542        // SAFETY:
543        // ArrayData is valid, and verified type above
544        let value_offsets = unsafe { get_offsets_from_buffer(buffer, offset, len) };
545
546        Ok(Self {
547            data_type,
548            nulls,
549            values,
550            value_offsets,
551        })
552    }
553}
554
555/// SAFETY: Correctly implements the contract of Arrow Arrays
556unsafe impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
557    fn as_any(&self) -> &dyn Any {
558        self
559    }
560
561    fn to_data(&self) -> ArrayData {
562        self.clone().into()
563    }
564
565    fn into_data(self) -> ArrayData {
566        self.into()
567    }
568
569    fn data_type(&self) -> &DataType {
570        &self.data_type
571    }
572
573    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
574        Arc::new(self.slice(offset, length))
575    }
576
577    fn len(&self) -> usize {
578        self.value_offsets.len() - 1
579    }
580
581    fn is_empty(&self) -> bool {
582        self.value_offsets.len() <= 1
583    }
584
585    fn shrink_to_fit(&mut self) {
586        if let Some(nulls) = &mut self.nulls {
587            nulls.shrink_to_fit();
588        }
589        self.values.shrink_to_fit();
590        self.value_offsets.shrink_to_fit();
591    }
592
593    fn offset(&self) -> usize {
594        0
595    }
596
597    fn nulls(&self) -> Option<&NullBuffer> {
598        self.nulls.as_ref()
599    }
600
601    fn logical_null_count(&self) -> usize {
602        // More efficient that the default implementation
603        self.null_count()
604    }
605
606    fn get_buffer_memory_size(&self) -> usize {
607        let mut size = self.values.get_buffer_memory_size();
608        size += self.value_offsets.inner().inner().capacity();
609        if let Some(n) = self.nulls.as_ref() {
610            size += n.buffer().capacity();
611        }
612        size
613    }
614
615    fn get_array_memory_size(&self) -> usize {
616        let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
617        size += self.value_offsets.inner().inner().capacity();
618        if let Some(n) = self.nulls.as_ref() {
619            size += n.buffer().capacity();
620        }
621        size
622    }
623
624    #[cfg(feature = "pool")]
625    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
626        self.value_offsets.claim(pool);
627        self.values.claim(pool);
628        if let Some(nulls) = &self.nulls {
629            nulls.claim(pool);
630        }
631    }
632}
633
634impl<OffsetSize: OffsetSizeTrait> super::ListLikeArray for GenericListArray<OffsetSize> {
635    fn values(&self) -> &ArrayRef {
636        self.values()
637    }
638
639    fn element_range(&self, index: usize) -> std::ops::Range<usize> {
640        let offsets = self.offsets();
641        let start = offsets[index].as_usize();
642        let end = offsets[index + 1].as_usize();
643        start..end
644    }
645}
646
647impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
648    type Item = ArrayRef;
649
650    fn value(&self, index: usize) -> Self::Item {
651        GenericListArray::value(self, index)
652    }
653
654    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
655        GenericListArray::value(self, index)
656    }
657}
658
659impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
660    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
661        let prefix = OffsetSize::PREFIX;
662
663        write!(f, "{prefix}ListArray\n[\n")?;
664        print_long_array(self, f, |array, index, f| {
665            std::fmt::Debug::fmt(&array.value(index), f)
666        })?;
667        write!(f, "]")
668    }
669}
670
671/// A [`GenericListArray`] of variable size lists, storing offsets as `i32`.
672///
673/// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a [`ListArray`]
674pub type ListArray = GenericListArray<i32>;
675
676/// A [`GenericListArray`] of variable size lists, storing offsets as `i64`.
677///
678/// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`]
679pub type LargeListArray = GenericListArray<i64>;
680
681#[cfg(test)]
682mod tests {
683    use super::*;
684    use crate::builder::{
685        BooleanBuilder, FixedSizeListBuilder, Int32Builder, ListBuilder, StringBuilder,
686        StringDictionaryBuilder, UnionBuilder,
687    };
688    use crate::cast::AsArray;
689    use crate::types::{Int8Type, Int32Type};
690    use crate::{
691        BooleanArray, Int8Array, Int8DictionaryArray, Int32Array, Int64Array, StringArray,
692    };
693    use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
694    use arrow_schema::Field;
695
696    fn create_from_buffers() -> ListArray {
697        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
698        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
699        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
700        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
701        ListArray::new(field, offsets, Arc::new(values), None)
702    }
703
704    #[test]
705    fn test_from_iter_primitive() {
706        let data = vec![
707            Some(vec![Some(0), Some(1), Some(2)]),
708            Some(vec![Some(3), Some(4), Some(5)]),
709            Some(vec![Some(6), Some(7)]),
710        ];
711        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
712
713        let another = create_from_buffers();
714        assert_eq!(list_array, another)
715    }
716
717    #[test]
718    fn test_empty_list_array() {
719        // Construct an empty value array
720        let value_data = ArrayData::builder(DataType::Int32)
721            .len(0)
722            .add_buffer(Buffer::from([]))
723            .build()
724            .unwrap();
725
726        // Construct an empty offset buffer
727        let value_offsets = Buffer::from([]);
728
729        // Construct a list array from the above two
730        let list_data_type =
731            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
732        let list_data = ArrayData::builder(list_data_type)
733            .len(0)
734            .add_buffer(value_offsets)
735            .add_child_data(value_data)
736            .build()
737            .unwrap();
738
739        let list_array = ListArray::from(list_data);
740        assert_eq!(list_array.len(), 0)
741    }
742
743    #[test]
744    fn test_list_array() {
745        // Construct a value array
746        let value_data = ArrayData::builder(DataType::Int32)
747            .len(8)
748            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
749            .build()
750            .unwrap();
751
752        // Construct a buffer for value offsets, for the nested array:
753        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
754        let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
755
756        // Construct a list array from the above two
757        let list_data_type =
758            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
759        let list_data = ArrayData::builder(list_data_type.clone())
760            .len(3)
761            .add_buffer(value_offsets.clone())
762            .add_child_data(value_data.clone())
763            .build()
764            .unwrap();
765        let list_array = ListArray::from(list_data);
766
767        let values = list_array.values();
768        assert_eq!(value_data, values.to_data());
769        assert_eq!(DataType::Int32, list_array.value_type());
770        assert_eq!(3, list_array.len());
771        assert_eq!(0, list_array.null_count());
772        assert_eq!(6, list_array.value_offsets()[2]);
773        assert_eq!(2, list_array.value_length(2));
774        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
775        assert_eq!(
776            0,
777            unsafe { list_array.value_unchecked(0) }
778                .as_primitive::<Int32Type>()
779                .value(0)
780        );
781        for i in 0..3 {
782            assert!(list_array.is_valid(i));
783            assert!(!list_array.is_null(i));
784        }
785
786        // Now test with a non-zero offset (skip first element)
787        //  [[3, 4, 5], [6, 7]]
788        let list_data = ArrayData::builder(list_data_type)
789            .len(2)
790            .offset(1)
791            .add_buffer(value_offsets)
792            .add_child_data(value_data.clone())
793            .build()
794            .unwrap();
795        let list_array = ListArray::from(list_data);
796
797        let values = list_array.values();
798        assert_eq!(value_data, values.to_data());
799        assert_eq!(DataType::Int32, list_array.value_type());
800        assert_eq!(2, list_array.len());
801        assert_eq!(0, list_array.null_count());
802        assert_eq!(6, list_array.value_offsets()[1]);
803        assert_eq!(2, list_array.value_length(1));
804        assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
805        assert_eq!(
806            3,
807            unsafe { list_array.value_unchecked(0) }
808                .as_primitive::<Int32Type>()
809                .value(0)
810        );
811    }
812
813    #[test]
814    fn test_large_list_array() {
815        // Construct a value array
816        let value_data = ArrayData::builder(DataType::Int32)
817            .len(8)
818            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
819            .build()
820            .unwrap();
821
822        // Construct a buffer for value offsets, for the nested array:
823        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
824        let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
825
826        // Construct a list array from the above two
827        let list_data_type = DataType::new_large_list(DataType::Int32, false);
828        let list_data = ArrayData::builder(list_data_type.clone())
829            .len(3)
830            .add_buffer(value_offsets.clone())
831            .add_child_data(value_data.clone())
832            .build()
833            .unwrap();
834        let list_array = LargeListArray::from(list_data);
835
836        let values = list_array.values();
837        assert_eq!(value_data, values.to_data());
838        assert_eq!(DataType::Int32, list_array.value_type());
839        assert_eq!(3, list_array.len());
840        assert_eq!(0, list_array.null_count());
841        assert_eq!(6, list_array.value_offsets()[2]);
842        assert_eq!(2, list_array.value_length(2));
843        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
844        assert_eq!(
845            0,
846            unsafe { list_array.value_unchecked(0) }
847                .as_primitive::<Int32Type>()
848                .value(0)
849        );
850        for i in 0..3 {
851            assert!(list_array.is_valid(i));
852            assert!(!list_array.is_null(i));
853        }
854
855        // Now test with a non-zero offset
856        //  [[3, 4, 5], [6, 7]]
857        let list_data = ArrayData::builder(list_data_type)
858            .len(2)
859            .offset(1)
860            .add_buffer(value_offsets)
861            .add_child_data(value_data.clone())
862            .build()
863            .unwrap();
864        let list_array = LargeListArray::from(list_data);
865
866        let values = list_array.values();
867        assert_eq!(value_data, values.to_data());
868        assert_eq!(DataType::Int32, list_array.value_type());
869        assert_eq!(2, list_array.len());
870        assert_eq!(0, list_array.null_count());
871        assert_eq!(6, list_array.value_offsets()[1]);
872        assert_eq!(2, list_array.value_length(1));
873        assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
874        assert_eq!(
875            3,
876            unsafe { list_array.value_unchecked(0) }
877                .as_primitive::<Int32Type>()
878                .value(0)
879        );
880    }
881
882    #[test]
883    fn test_list_array_slice() {
884        // Construct a value array
885        let value_data = ArrayData::builder(DataType::Int32)
886            .len(10)
887            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
888            .build()
889            .unwrap();
890
891        // Construct a buffer for value offsets, for the nested array:
892        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
893        let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
894        // 01011001 00000001
895        let mut null_bits: [u8; 2] = [0; 2];
896        bit_util::set_bit(&mut null_bits, 0);
897        bit_util::set_bit(&mut null_bits, 3);
898        bit_util::set_bit(&mut null_bits, 4);
899        bit_util::set_bit(&mut null_bits, 6);
900        bit_util::set_bit(&mut null_bits, 8);
901
902        // Construct a list array from the above two
903        let list_data_type =
904            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
905        let list_data = ArrayData::builder(list_data_type)
906            .len(9)
907            .add_buffer(value_offsets)
908            .add_child_data(value_data.clone())
909            .null_bit_buffer(Some(Buffer::from(null_bits)))
910            .build()
911            .unwrap();
912        let list_array = ListArray::from(list_data);
913
914        let values = list_array.values();
915        assert_eq!(value_data, values.to_data());
916        assert_eq!(DataType::Int32, list_array.value_type());
917        assert_eq!(9, list_array.len());
918        assert_eq!(4, list_array.null_count());
919        assert_eq!(2, list_array.value_offsets()[3]);
920        assert_eq!(2, list_array.value_length(3));
921
922        let sliced_array = list_array.slice(1, 6);
923        assert_eq!(6, sliced_array.len());
924        assert_eq!(3, sliced_array.null_count());
925
926        for i in 0..sliced_array.len() {
927            if bit_util::get_bit(&null_bits, 1 + i) {
928                assert!(sliced_array.is_valid(i));
929            } else {
930                assert!(sliced_array.is_null(i));
931            }
932        }
933
934        // Check offset and length for each non-null value.
935        let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
936        assert_eq!(2, sliced_list_array.value_offsets()[2]);
937        assert_eq!(2, sliced_list_array.value_length(2));
938        assert_eq!(4, sliced_list_array.value_offsets()[3]);
939        assert_eq!(2, sliced_list_array.value_length(3));
940        assert_eq!(6, sliced_list_array.value_offsets()[5]);
941        assert_eq!(3, sliced_list_array.value_length(5));
942    }
943
944    #[test]
945    fn test_large_list_array_slice() {
946        // Construct a value array
947        let value_data = ArrayData::builder(DataType::Int32)
948            .len(10)
949            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
950            .build()
951            .unwrap();
952
953        // Construct a buffer for value offsets, for the nested array:
954        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
955        let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
956        // 01011001 00000001
957        let mut null_bits: [u8; 2] = [0; 2];
958        bit_util::set_bit(&mut null_bits, 0);
959        bit_util::set_bit(&mut null_bits, 3);
960        bit_util::set_bit(&mut null_bits, 4);
961        bit_util::set_bit(&mut null_bits, 6);
962        bit_util::set_bit(&mut null_bits, 8);
963
964        // Construct a list array from the above two
965        let list_data_type = DataType::new_large_list(DataType::Int32, false);
966        let list_data = ArrayData::builder(list_data_type)
967            .len(9)
968            .add_buffer(value_offsets)
969            .add_child_data(value_data.clone())
970            .null_bit_buffer(Some(Buffer::from(null_bits)))
971            .build()
972            .unwrap();
973        let list_array = LargeListArray::from(list_data);
974
975        let values = list_array.values();
976        assert_eq!(value_data, values.to_data());
977        assert_eq!(DataType::Int32, list_array.value_type());
978        assert_eq!(9, list_array.len());
979        assert_eq!(4, list_array.null_count());
980        assert_eq!(2, list_array.value_offsets()[3]);
981        assert_eq!(2, list_array.value_length(3));
982
983        let sliced_array = list_array.slice(1, 6);
984        assert_eq!(6, sliced_array.len());
985        assert_eq!(3, sliced_array.null_count());
986
987        for i in 0..sliced_array.len() {
988            if bit_util::get_bit(&null_bits, 1 + i) {
989                assert!(sliced_array.is_valid(i));
990            } else {
991                assert!(sliced_array.is_null(i));
992            }
993        }
994
995        // Check offset and length for each non-null value.
996        let sliced_list_array = sliced_array
997            .as_any()
998            .downcast_ref::<LargeListArray>()
999            .unwrap();
1000        assert_eq!(2, sliced_list_array.value_offsets()[2]);
1001        assert_eq!(2, sliced_list_array.value_length(2));
1002        assert_eq!(4, sliced_list_array.value_offsets()[3]);
1003        assert_eq!(2, sliced_list_array.value_length(3));
1004        assert_eq!(6, sliced_list_array.value_offsets()[5]);
1005        assert_eq!(3, sliced_list_array.value_length(5));
1006    }
1007
1008    #[test]
1009    #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
1010    fn test_list_array_index_out_of_bound() {
1011        // Construct a value array
1012        let value_data = ArrayData::builder(DataType::Int32)
1013            .len(10)
1014            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
1015            .build()
1016            .unwrap();
1017
1018        // Construct a buffer for value offsets, for the nested array:
1019        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
1020        let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
1021        // 01011001 00000001
1022        let mut null_bits: [u8; 2] = [0; 2];
1023        bit_util::set_bit(&mut null_bits, 0);
1024        bit_util::set_bit(&mut null_bits, 3);
1025        bit_util::set_bit(&mut null_bits, 4);
1026        bit_util::set_bit(&mut null_bits, 6);
1027        bit_util::set_bit(&mut null_bits, 8);
1028
1029        // Construct a list array from the above two
1030        let list_data_type = DataType::new_large_list(DataType::Int32, false);
1031        let list_data = ArrayData::builder(list_data_type)
1032            .len(9)
1033            .add_buffer(value_offsets)
1034            .add_child_data(value_data)
1035            .null_bit_buffer(Some(Buffer::from(null_bits)))
1036            .build()
1037            .unwrap();
1038        let list_array = LargeListArray::from(list_data);
1039        assert_eq!(9, list_array.len());
1040
1041        list_array.value(10);
1042    }
1043    #[test]
1044    #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
1045    // Different error messages, so skip for now
1046    // https://github.com/apache/arrow-rs/issues/1545
1047    #[cfg(not(feature = "force_validate"))]
1048    fn test_list_array_invalid_buffer_len() {
1049        let value_data = unsafe {
1050            ArrayData::builder(DataType::Int32)
1051                .len(8)
1052                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1053                .build_unchecked()
1054        };
1055        let list_data_type =
1056            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1057        let list_data = unsafe {
1058            ArrayData::builder(list_data_type)
1059                .len(3)
1060                .add_child_data(value_data)
1061                .build_unchecked()
1062        };
1063        drop(ListArray::from(list_data));
1064    }
1065
1066    #[test]
1067    #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1068    // Different error messages, so skip for now
1069    // https://github.com/apache/arrow-rs/issues/1545
1070    #[cfg(not(feature = "force_validate"))]
1071    fn test_list_array_invalid_child_array_len() {
1072        let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1073        let list_data_type =
1074            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1075        let list_data = unsafe {
1076            ArrayData::builder(list_data_type)
1077                .len(3)
1078                .add_buffer(value_offsets)
1079                .build_unchecked()
1080        };
1081        drop(ListArray::from(list_data));
1082    }
1083
1084    #[test]
1085    #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1086    fn test_from_array_data_validation() {
1087        let mut builder = ListBuilder::new(Int32Builder::new());
1088        builder.values().append_value(1);
1089        builder.append(true);
1090        let array = builder.finish();
1091        let _ = LargeListArray::from(array.into_data());
1092    }
1093
1094    #[test]
1095    fn test_list_array_offsets_need_not_start_at_zero() {
1096        let value_data = ArrayData::builder(DataType::Int32)
1097            .len(8)
1098            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1099            .build()
1100            .unwrap();
1101
1102        let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1103
1104        let list_data_type =
1105            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1106        let list_data = ArrayData::builder(list_data_type)
1107            .len(3)
1108            .add_buffer(value_offsets)
1109            .add_child_data(value_data)
1110            .build()
1111            .unwrap();
1112
1113        let list_array = ListArray::from(list_data);
1114        assert_eq!(list_array.value_length(0), 0);
1115        assert_eq!(list_array.value_length(1), 3);
1116        assert_eq!(list_array.value_length(2), 2);
1117    }
1118
1119    #[test]
1120    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1121    // Different error messages, so skip for now
1122    // https://github.com/apache/arrow-rs/issues/1545
1123    #[cfg(not(feature = "force_validate"))]
1124    fn test_primitive_array_alignment() {
1125        let buf = Buffer::from_slice_ref([0_u64]);
1126        let buf2 = buf.slice(1);
1127        let array_data = unsafe {
1128            ArrayData::builder(DataType::Int32)
1129                .add_buffer(buf2)
1130                .build_unchecked()
1131        };
1132        drop(Int32Array::from(array_data));
1133    }
1134
1135    #[test]
1136    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1137    // Different error messages, so skip for now
1138    // https://github.com/apache/arrow-rs/issues/1545
1139    #[cfg(not(feature = "force_validate"))]
1140    fn test_list_array_alignment() {
1141        let buf = Buffer::from_slice_ref([0_u64]);
1142        let buf2 = buf.slice(1);
1143
1144        let values: [i32; 8] = [0; 8];
1145        let value_data = unsafe {
1146            ArrayData::builder(DataType::Int32)
1147                .add_buffer(Buffer::from_slice_ref(values))
1148                .build_unchecked()
1149        };
1150
1151        let list_data_type =
1152            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1153        let list_data = unsafe {
1154            ArrayData::builder(list_data_type)
1155                .add_buffer(buf2)
1156                .add_child_data(value_data)
1157                .build_unchecked()
1158        };
1159        drop(ListArray::from(list_data));
1160    }
1161
1162    #[test]
1163    fn list_array_equality() {
1164        // test scaffold
1165        fn do_comparison(
1166            lhs_data: Vec<Option<Vec<Option<i32>>>>,
1167            rhs_data: Vec<Option<Vec<Option<i32>>>>,
1168            should_equal: bool,
1169        ) {
1170            let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1171            let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1172            assert_eq!(lhs == rhs, should_equal);
1173
1174            let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1175            let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1176            assert_eq!(lhs == rhs, should_equal);
1177        }
1178
1179        do_comparison(
1180            vec![
1181                Some(vec![Some(0), Some(1), Some(2)]),
1182                None,
1183                Some(vec![Some(3), None, Some(5)]),
1184                Some(vec![Some(6), Some(7)]),
1185            ],
1186            vec![
1187                Some(vec![Some(0), Some(1), Some(2)]),
1188                None,
1189                Some(vec![Some(3), None, Some(5)]),
1190                Some(vec![Some(6), Some(7)]),
1191            ],
1192            true,
1193        );
1194
1195        do_comparison(
1196            vec![
1197                None,
1198                None,
1199                Some(vec![Some(3), None, Some(5)]),
1200                Some(vec![Some(6), Some(7)]),
1201            ],
1202            vec![
1203                Some(vec![Some(0), Some(1), Some(2)]),
1204                None,
1205                Some(vec![Some(3), None, Some(5)]),
1206                Some(vec![Some(6), Some(7)]),
1207            ],
1208            false,
1209        );
1210
1211        do_comparison(
1212            vec![
1213                None,
1214                None,
1215                Some(vec![Some(3), None, Some(5)]),
1216                Some(vec![Some(6), Some(7)]),
1217            ],
1218            vec![
1219                None,
1220                None,
1221                Some(vec![Some(3), None, Some(5)]),
1222                Some(vec![Some(0), Some(0)]),
1223            ],
1224            false,
1225        );
1226
1227        do_comparison(
1228            vec![None, None, Some(vec![Some(1)])],
1229            vec![None, None, Some(vec![Some(2)])],
1230            false,
1231        );
1232    }
1233
1234    #[test]
1235    fn test_empty_offsets() {
1236        let f = Arc::new(Field::new("element", DataType::Int32, true));
1237        let string = ListArray::from(
1238            ArrayData::builder(DataType::List(f.clone()))
1239                .buffers(vec![Buffer::from(&[])])
1240                .add_child_data(ArrayData::new_empty(&DataType::Int32))
1241                .build()
1242                .unwrap(),
1243        );
1244        assert_eq!(string.value_offsets(), &[0]);
1245        let string = LargeListArray::from(
1246            ArrayData::builder(DataType::LargeList(f))
1247                .buffers(vec![Buffer::from(&[])])
1248                .add_child_data(ArrayData::new_empty(&DataType::Int32))
1249                .build()
1250                .unwrap(),
1251        );
1252        assert_eq!(string.len(), 0);
1253        assert_eq!(string.value_offsets(), &[0]);
1254    }
1255
1256    #[test]
1257    fn test_try_new() {
1258        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1259        let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1260        let values = Arc::new(values) as ArrayRef;
1261
1262        let field = Arc::new(Field::new("element", DataType::Int32, false));
1263        ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1264
1265        let nulls = NullBuffer::new_null(3);
1266        ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1267
1268        let nulls = NullBuffer::new_null(3);
1269        let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1270        let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1271            .unwrap_err();
1272
1273        assert_eq!(
1274            err.to_string(),
1275            "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1276        );
1277
1278        let field = Arc::new(Field::new("element", DataType::Int64, false));
1279        let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1280            .unwrap_err();
1281
1282        assert_eq!(
1283            err.to_string(),
1284            "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1285        );
1286
1287        let nulls = NullBuffer::new_null(7);
1288        let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1289        let values = Arc::new(values);
1290
1291        let err =
1292            LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1293
1294        assert_eq!(
1295            err.to_string(),
1296            "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1297        );
1298
1299        let field = Arc::new(Field::new("element", DataType::Int64, true));
1300        LargeListArray::new(field.clone(), offsets.clone(), values, None);
1301
1302        let values = Int64Array::new(vec![0; 2].into(), None);
1303        let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1304
1305        assert_eq!(
1306            err.to_string(),
1307            "Invalid argument error: Max offset of 5 exceeds length of values 2"
1308        );
1309    }
1310
1311    #[test]
1312    fn test_from_fixed_size_list() {
1313        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1314        builder.values().append_slice(&[1, 2, 3]);
1315        builder.append(true);
1316        builder.values().append_slice(&[0, 0, 0]);
1317        builder.append(false);
1318        builder.values().append_slice(&[4, 5, 6]);
1319        builder.append(true);
1320        let list: ListArray = builder.finish().into();
1321
1322        let values: Vec<_> = list
1323            .iter()
1324            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1325            .collect();
1326        assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1327    }
1328
1329    #[test]
1330    fn test_nullable_union() {
1331        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1332        let mut builder = UnionBuilder::new_dense();
1333        builder.append::<Int32Type>("a", 1).unwrap();
1334        builder.append::<Int32Type>("b", 2).unwrap();
1335        builder.append::<Int32Type>("b", 3).unwrap();
1336        builder.append::<Int32Type>("a", 4).unwrap();
1337        builder.append::<Int32Type>("a", 5).unwrap();
1338        let values = builder.build().unwrap();
1339        let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1340        ListArray::new(field.clone(), offsets, Arc::new(values), None);
1341    }
1342
1343    #[test]
1344    fn test_list_new_null_len() {
1345        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
1346        let array = ListArray::new_null(field, 5);
1347        assert_eq!(array.len(), 5);
1348    }
1349
1350    #[test]
1351    fn test_list_from_iter_i32() {
1352        let array = ListArray::from_nested_iter::<Int32Builder, _, _, _>(vec![
1353            None,
1354            Some(vec![Some(1), None, Some(2)]),
1355        ]);
1356        let expected_offsets = &[0, 0, 3];
1357        let expected_values: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, Some(2)]));
1358        assert_eq!(array.value_offsets(), expected_offsets);
1359        assert_eq!(array.values(), &expected_values);
1360    }
1361
1362    #[test]
1363    fn test_list_from_iter_bool() {
1364        let array = ListArray::from_nested_iter::<BooleanBuilder, _, _, _>(vec![
1365            Some(vec![None, Some(false), Some(true)]),
1366            None,
1367        ]);
1368        let expected_offsets = &[0, 3, 3];
1369        let expected_values: ArrayRef =
1370            Arc::new(BooleanArray::from(vec![None, Some(false), Some(true)]));
1371        assert_eq!(array.value_offsets(), expected_offsets);
1372        assert_eq!(array.values(), &expected_values);
1373    }
1374
1375    #[test]
1376    fn test_list_from_iter_str() {
1377        let array = ListArray::from_nested_iter::<StringBuilder, _, _, _>(vec![
1378            Some(vec![Some("foo"), None, Some("bar")]),
1379            None,
1380        ]);
1381        let expected_offsets = &[0, 3, 3];
1382        let expected_values: ArrayRef =
1383            Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")]));
1384        assert_eq!(array.value_offsets(), expected_offsets);
1385        assert_eq!(array.values(), &expected_values);
1386    }
1387
1388    #[test]
1389    fn test_list_from_iter_dict_str() {
1390        let array =
1391            ListArray::from_nested_iter::<StringDictionaryBuilder<Int8Type>, _, _, _>(vec![
1392                Some(vec![Some("foo"), None, Some("bar"), Some("foo")]),
1393                None,
1394            ]);
1395        let expected_offsets = &[0, 4, 4];
1396        let expected_dict_values: ArrayRef =
1397            Arc::new(StringArray::from(vec![Some("foo"), Some("bar")]));
1398        let expected_dict_keys = Int8Array::from(vec![Some(0), None, Some(1), Some(0)]);
1399        let expected_values: ArrayRef = Arc::new(
1400            Int8DictionaryArray::try_new(expected_dict_keys, expected_dict_values).unwrap(),
1401        );
1402        assert_eq!(array.value_offsets(), expected_offsets);
1403        assert_eq!(array.values(), &expected_values);
1404    }
1405}