arrow_array/builder/
fixed_size_list_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::ArrayBuilder;
19use crate::{ArrayRef, FixedSizeListArray};
20use arrow_buffer::NullBufferBuilder;
21use arrow_schema::{Field, FieldRef};
22use std::any::Any;
23use std::sync::Arc;
24
25///  Builder for [`FixedSizeListArray`]
26/// ```
27/// use arrow_array::{builder::{Int32Builder, FixedSizeListBuilder}, Array, Int32Array};
28/// let values_builder = Int32Builder::new();
29/// let mut builder = FixedSizeListBuilder::new(values_builder, 3);
30///
31/// //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
32/// builder.values().append_value(0);
33/// builder.values().append_value(1);
34/// builder.values().append_value(2);
35/// builder.append(true);
36/// builder.values().append_null();
37/// builder.values().append_null();
38/// builder.values().append_null();
39/// builder.append(false);
40/// builder.values().append_value(3);
41/// builder.values().append_null();
42/// builder.values().append_value(5);
43/// builder.append(true);
44/// builder.values().append_value(6);
45/// builder.values().append_value(7);
46/// builder.values().append_null();
47/// builder.append(true);
48/// let list_array = builder.finish();
49/// assert_eq!(
50///     *list_array.value(0),
51///     Int32Array::from(vec![Some(0), Some(1), Some(2)])
52/// );
53/// assert!(list_array.is_null(1));
54/// assert_eq!(
55///     *list_array.value(2),
56///     Int32Array::from(vec![Some(3), None, Some(5)])
57/// );
58/// assert_eq!(
59///     *list_array.value(3),
60///     Int32Array::from(vec![Some(6), Some(7), None])
61/// )
62/// ```
63///
64#[derive(Debug)]
65pub struct FixedSizeListBuilder<T: ArrayBuilder> {
66    null_buffer_builder: NullBufferBuilder,
67    values_builder: T,
68    list_len: i32,
69    field: Option<FieldRef>,
70}
71
72impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
73    /// Creates a new [`FixedSizeListBuilder`] from a given values array builder
74    /// `value_length` is the number of values within each array
75    pub fn new(values_builder: T, value_length: i32) -> Self {
76        let capacity = values_builder
77            .len()
78            .checked_div(value_length as _)
79            .unwrap_or_default();
80
81        Self::with_capacity(values_builder, value_length, capacity)
82    }
83
84    /// Creates a new [`FixedSizeListBuilder`] from a given values array builder
85    /// `value_length` is the number of values within each array
86    /// `capacity` is the number of items to pre-allocate space for in this builder
87    pub fn with_capacity(values_builder: T, value_length: i32, capacity: usize) -> Self {
88        Self {
89            null_buffer_builder: NullBufferBuilder::new(capacity),
90            values_builder,
91            list_len: value_length,
92            field: None,
93        }
94    }
95
96    /// Override the field passed to [`FixedSizeListArray::new`]
97    ///
98    /// By default, a nullable field is created with the name `item`
99    ///
100    /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
101    /// field's data type does not match that of `T`
102    pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
103        Self {
104            field: Some(field.into()),
105            ..self
106        }
107    }
108}
109
110impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T>
111where
112    T: 'static,
113{
114    /// Returns the builder as a non-mutable `Any` reference.
115    fn as_any(&self) -> &dyn Any {
116        self
117    }
118
119    /// Returns the builder as a mutable `Any` reference.
120    fn as_any_mut(&mut self) -> &mut dyn Any {
121        self
122    }
123
124    /// Returns the boxed builder as a box of `Any`.
125    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
126        self
127    }
128
129    /// Returns the number of array slots in the builder
130    fn len(&self) -> usize {
131        self.null_buffer_builder.len()
132    }
133
134    /// Builds the array and reset this builder.
135    fn finish(&mut self) -> ArrayRef {
136        Arc::new(self.finish())
137    }
138
139    /// Builds the array without resetting the builder.
140    fn finish_cloned(&self) -> ArrayRef {
141        Arc::new(self.finish_cloned())
142    }
143}
144
145impl<T: ArrayBuilder> FixedSizeListBuilder<T>
146where
147    T: 'static,
148{
149    /// Returns the child array builder as a mutable reference.
150    ///
151    /// This mutable reference can be used to append values into the child array builder,
152    /// but you must call [`append`](#method.append) to delimit each distinct list value.
153    pub fn values(&mut self) -> &mut T {
154        &mut self.values_builder
155    }
156
157    /// Returns the length of the list
158    pub fn value_length(&self) -> i32 {
159        self.list_len
160    }
161
162    /// Finish the current fixed-length list array slot
163    #[inline]
164    pub fn append(&mut self, is_valid: bool) {
165        self.null_buffer_builder.append(is_valid);
166    }
167
168    /// Builds the [`FixedSizeListBuilder`] and reset this builder.
169    pub fn finish(&mut self) -> FixedSizeListArray {
170        let len = self.len();
171        let values = self.values_builder.finish();
172        let nulls = self.null_buffer_builder.finish();
173
174        assert_eq!(
175            values.len(),
176            len * self.list_len as usize,
177            "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
178            values.len(),
179            self.list_len,
180            len,
181        );
182
183        let field = self
184            .field
185            .clone()
186            .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
187
188        FixedSizeListArray::new(field, self.list_len, values, nulls)
189    }
190
191    /// Builds the [`FixedSizeListBuilder`] without resetting the builder.
192    pub fn finish_cloned(&self) -> FixedSizeListArray {
193        let len = self.len();
194        let values = self.values_builder.finish_cloned();
195        let nulls = self.null_buffer_builder.finish_cloned();
196
197        assert_eq!(
198            values.len(),
199            len * self.list_len as usize,
200            "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
201            values.len(),
202            self.list_len,
203            len,
204        );
205
206        let field = self
207            .field
208            .clone()
209            .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
210
211        FixedSizeListArray::new(field, self.list_len, values, nulls)
212    }
213
214    /// Returns the current null buffer as a slice
215    pub fn validity_slice(&self) -> Option<&[u8]> {
216        self.null_buffer_builder.as_slice()
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223    use arrow_schema::DataType;
224
225    use crate::Array;
226    use crate::Int32Array;
227    use crate::builder::Int32Builder;
228
229    fn make_list_builder(
230        include_null_element: bool,
231        include_null_in_values: bool,
232    ) -> FixedSizeListBuilder<crate::builder::PrimitiveBuilder<crate::types::Int32Type>> {
233        let values_builder = Int32Builder::new();
234        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
235
236        builder.values().append_value(0);
237        builder.values().append_value(1);
238        builder.values().append_value(2);
239        builder.append(true);
240
241        builder.values().append_value(2);
242        builder.values().append_value(3);
243        builder.values().append_value(4);
244        builder.append(true);
245
246        if include_null_element {
247            builder.values().append_null();
248            builder.values().append_null();
249            builder.values().append_null();
250            builder.append(false);
251        } else {
252            builder.values().append_value(2);
253            builder.values().append_value(3);
254            builder.values().append_value(4);
255            builder.append(true);
256        }
257
258        if include_null_in_values {
259            builder.values().append_value(3);
260            builder.values().append_null();
261            builder.values().append_value(5);
262            builder.append(true);
263        } else {
264            builder.values().append_value(3);
265            builder.values().append_value(4);
266            builder.values().append_value(5);
267            builder.append(true);
268        }
269
270        builder
271    }
272
273    #[test]
274    fn test_fixed_size_list_array_builder() {
275        let mut builder = make_list_builder(true, true);
276
277        let list_array = builder.finish();
278
279        assert_eq!(DataType::Int32, list_array.value_type());
280        assert_eq!(4, list_array.len());
281        assert_eq!(1, list_array.null_count());
282        assert_eq!(6, list_array.value_offset(2));
283        assert_eq!(3, list_array.value_length());
284    }
285
286    #[test]
287    fn test_fixed_size_list_array_builder_with_field() {
288        let builder = make_list_builder(false, false);
289        let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
290        let list_array = builder.finish();
291
292        assert_eq!(DataType::Int32, list_array.value_type());
293        assert_eq!(4, list_array.len());
294        assert_eq!(0, list_array.null_count());
295        assert_eq!(6, list_array.value_offset(2));
296        assert_eq!(3, list_array.value_length());
297    }
298
299    #[test]
300    fn test_fixed_size_list_array_builder_with_field_and_null() {
301        let builder = make_list_builder(true, false);
302        let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
303        let list_array = builder.finish();
304
305        assert_eq!(DataType::Int32, list_array.value_type());
306        assert_eq!(4, list_array.len());
307        assert_eq!(1, list_array.null_count());
308        assert_eq!(6, list_array.value_offset(2));
309        assert_eq!(3, list_array.value_length());
310    }
311
312    #[test]
313    #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
314    fn test_fixed_size_list_array_builder_with_field_null_panic() {
315        let builder = make_list_builder(true, true);
316        let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
317
318        builder.finish();
319    }
320
321    #[test]
322    #[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
323    fn test_fixed_size_list_array_builder_with_field_type_panic() {
324        let values_builder = Int32Builder::new();
325        let builder = FixedSizeListBuilder::new(values_builder, 3);
326        let mut builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
327
328        //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
329        builder.values().append_value(0);
330        builder.values().append_value(1);
331        builder.values().append_value(2);
332        builder.append(true);
333        builder.values().append_null();
334        builder.values().append_null();
335        builder.values().append_null();
336        builder.append(false);
337        builder.values().append_value(3);
338        builder.values().append_value(4);
339        builder.values().append_value(5);
340        builder.append(true);
341
342        builder.finish();
343    }
344
345    #[test]
346    fn test_fixed_size_list_array_builder_cloned_with_field() {
347        let builder = make_list_builder(true, true);
348        let builder = builder.with_field(Field::new("list_element", DataType::Int32, true));
349
350        let list_array = builder.finish_cloned();
351
352        assert_eq!(DataType::Int32, list_array.value_type());
353        assert_eq!(4, list_array.len());
354        assert_eq!(1, list_array.null_count());
355        assert_eq!(6, list_array.value_offset(2));
356        assert_eq!(3, list_array.value_length());
357    }
358
359    #[test]
360    #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
361    fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
362        let builder = make_list_builder(true, true);
363        let builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
364
365        builder.finish_cloned();
366    }
367
368    #[test]
369    fn test_fixed_size_list_array_builder_cloned_with_field_and_null() {
370        let builder = make_list_builder(true, false);
371        let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
372        let list_array = builder.finish();
373
374        assert_eq!(DataType::Int32, list_array.value_type());
375        assert_eq!(4, list_array.len());
376        assert_eq!(1, list_array.null_count());
377        assert_eq!(6, list_array.value_offset(2));
378        assert_eq!(3, list_array.value_length());
379    }
380
381    #[test]
382    #[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
383    fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
384        let builder = make_list_builder(false, false);
385        let builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
386
387        builder.finish_cloned();
388    }
389
390    #[test]
391    fn test_fixed_size_list_array_builder_finish_cloned() {
392        let mut builder = make_list_builder(true, true);
393
394        let mut list_array = builder.finish_cloned();
395
396        assert_eq!(DataType::Int32, list_array.value_type());
397        assert_eq!(4, list_array.len());
398        assert_eq!(1, list_array.null_count());
399        assert_eq!(3, list_array.value_length());
400
401        builder.values().append_value(6);
402        builder.values().append_value(7);
403        builder.values().append_null();
404        builder.append(true);
405        builder.values().append_null();
406        builder.values().append_null();
407        builder.values().append_null();
408        builder.append(false);
409        list_array = builder.finish();
410
411        assert_eq!(DataType::Int32, list_array.value_type());
412        assert_eq!(6, list_array.len());
413        assert_eq!(2, list_array.null_count());
414        assert_eq!(6, list_array.value_offset(2));
415        assert_eq!(3, list_array.value_length());
416    }
417
418    #[test]
419    fn test_fixed_size_list_array_builder_with_field_empty() {
420        let values_builder = Int32Array::builder(0);
421        let mut builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
422            "list_item",
423            DataType::Int32,
424            false,
425        ));
426        assert!(builder.is_empty());
427        let arr = builder.finish();
428        assert_eq!(0, arr.len());
429        assert_eq!(0, builder.len());
430    }
431
432    #[test]
433    fn test_fixed_size_list_array_builder_cloned_with_field_empty() {
434        let values_builder = Int32Array::builder(0);
435        let builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
436            "list_item",
437            DataType::Int32,
438            false,
439        ));
440        assert!(builder.is_empty());
441        let arr = builder.finish_cloned();
442        assert_eq!(0, arr.len());
443        assert_eq!(0, builder.len());
444    }
445
446    #[test]
447    fn test_fixed_size_list_array_builder_empty() {
448        let values_builder = Int32Array::builder(5);
449        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
450        assert!(builder.is_empty());
451        let arr = builder.finish();
452        assert_eq!(0, arr.len());
453        assert_eq!(0, builder.len());
454    }
455
456    #[test]
457    fn test_fixed_size_list_array_builder_finish() {
458        let values_builder = Int32Array::builder(5);
459        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
460
461        builder.values().append_slice(&[1, 2, 3]);
462        builder.append(true);
463        builder.values().append_slice(&[4, 5, 6]);
464        builder.append(true);
465
466        let mut arr = builder.finish();
467        assert_eq!(2, arr.len());
468        assert_eq!(0, builder.len());
469
470        builder.values().append_slice(&[7, 8, 9]);
471        builder.append(true);
472        arr = builder.finish();
473        assert_eq!(1, arr.len());
474        assert_eq!(0, builder.len());
475    }
476
477    #[test]
478    #[should_panic(
479        expected = "Length of the child array (10) must be the multiple of the value length (3) and the array length (3)."
480    )]
481    fn test_fixed_size_list_array_builder_fail() {
482        let values_builder = Int32Array::builder(5);
483        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
484
485        builder.values().append_slice(&[1, 2, 3]);
486        builder.append(true);
487        builder.values().append_slice(&[4, 5, 6]);
488        builder.append(true);
489        builder.values().append_slice(&[7, 8, 9, 10]);
490        builder.append(true);
491
492        builder.finish();
493    }
494}