arrow_array/builder/
boolean_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::{ArrayBuilder, BooleanBufferBuilder};
19use crate::{Array, ArrayRef, BooleanArray};
20use arrow_buffer::Buffer;
21use arrow_buffer::NullBufferBuilder;
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType};
24use std::any::Any;
25use std::sync::Arc;
26
27/// Builder for [`BooleanArray`]
28///
29/// # Example
30///
31/// Create a `BooleanArray` from a `BooleanBuilder`
32///
33/// ```
34///
35/// # use arrow_array::{Array, BooleanArray, builder::BooleanBuilder};
36///
37/// let mut b = BooleanBuilder::new();
38/// b.append_value(true);
39/// b.append_null();
40/// b.append_value(false);
41/// b.append_value(true);
42/// let arr = b.finish();
43///
44/// assert_eq!(4, arr.len());
45/// assert_eq!(1, arr.null_count());
46/// assert_eq!(true, arr.value(0));
47/// assert!(arr.is_valid(0));
48/// assert!(!arr.is_null(0));
49/// assert!(!arr.is_valid(1));
50/// assert!(arr.is_null(1));
51/// assert_eq!(false, arr.value(2));
52/// assert!(arr.is_valid(2));
53/// assert!(!arr.is_null(2));
54/// assert_eq!(true, arr.value(3));
55/// assert!(arr.is_valid(3));
56/// assert!(!arr.is_null(3));
57/// ```
58#[derive(Debug)]
59pub struct BooleanBuilder {
60    values_builder: BooleanBufferBuilder,
61    null_buffer_builder: NullBufferBuilder,
62}
63
64impl Default for BooleanBuilder {
65    fn default() -> Self {
66        Self::new()
67    }
68}
69
70impl BooleanBuilder {
71    /// Creates a new boolean builder
72    pub fn new() -> Self {
73        Self::with_capacity(1024)
74    }
75
76    /// Creates a new boolean builder with space for `capacity` elements without re-allocating
77    pub fn with_capacity(capacity: usize) -> Self {
78        Self {
79            values_builder: BooleanBufferBuilder::new(capacity),
80            null_buffer_builder: NullBufferBuilder::new(capacity),
81        }
82    }
83
84    /// Returns the capacity of this builder measured in slots of type `T`
85    pub fn capacity(&self) -> usize {
86        self.values_builder.capacity()
87    }
88
89    /// Appends a value of type `T` into the builder
90    #[inline]
91    pub fn append_value(&mut self, v: bool) {
92        self.values_builder.append(v);
93        self.null_buffer_builder.append_non_null();
94    }
95
96    /// Appends a null slot into the builder
97    #[inline]
98    pub fn append_null(&mut self) {
99        self.null_buffer_builder.append_null();
100        self.values_builder.advance(1);
101    }
102
103    /// Appends `n` `null`s into the builder.
104    #[inline]
105    pub fn append_nulls(&mut self, n: usize) {
106        self.null_buffer_builder.append_n_nulls(n);
107        self.values_builder.advance(n);
108    }
109
110    /// Appends an `Option<T>` into the builder
111    #[inline]
112    pub fn append_option(&mut self, v: Option<bool>) {
113        match v {
114            None => self.append_null(),
115            Some(v) => self.append_value(v),
116        };
117    }
118
119    /// Appends a slice of type `T` into the builder
120    #[inline]
121    pub fn append_slice(&mut self, v: &[bool]) {
122        self.values_builder.append_slice(v);
123        self.null_buffer_builder.append_n_non_nulls(v.len());
124    }
125
126    /// Appends n `additional` bits of value `v` into the buffer
127    #[inline]
128    pub fn append_n(&mut self, additional: usize, v: bool) {
129        self.values_builder.append_n(additional, v);
130        self.null_buffer_builder.append_n_non_nulls(additional);
131    }
132
133    /// Appends values from a slice of type `T` and a validity boolean slice.
134    ///
135    /// Returns an error if the slices are of different lengths
136    #[inline]
137    pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<(), ArrowError> {
138        if values.len() != is_valid.len() {
139            Err(ArrowError::InvalidArgumentError(
140                "Value and validity lengths must be equal".to_string(),
141            ))
142        } else {
143            self.null_buffer_builder.append_slice(is_valid);
144            self.values_builder.append_slice(values);
145            Ok(())
146        }
147    }
148
149    /// Appends array values and null to this builder as is
150    /// (this means that underlying null values are copied as is).
151    #[inline]
152    pub fn append_array(&mut self, array: &BooleanArray) {
153        self.values_builder.append_buffer(array.values());
154        if let Some(null_buffer) = array.nulls() {
155            self.null_buffer_builder.append_buffer(null_buffer);
156        } else {
157            self.null_buffer_builder.append_n_non_nulls(array.len());
158        }
159    }
160
161    /// Builds the [BooleanArray] and reset this builder.
162    pub fn finish(&mut self) -> BooleanArray {
163        let len = self.len();
164        let null_bit_buffer = self.null_buffer_builder.finish();
165        let builder = ArrayData::builder(DataType::Boolean)
166            .len(len)
167            .add_buffer(self.values_builder.finish().into_inner())
168            .nulls(null_bit_buffer);
169
170        let array_data = unsafe { builder.build_unchecked() };
171        BooleanArray::from(array_data)
172    }
173
174    /// Builds the [BooleanArray] without resetting the builder.
175    pub fn finish_cloned(&self) -> BooleanArray {
176        let len = self.len();
177        let nulls = self.null_buffer_builder.finish_cloned();
178        let value_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
179        let builder = ArrayData::builder(DataType::Boolean)
180            .len(len)
181            .add_buffer(value_buffer)
182            .nulls(nulls);
183
184        let array_data = unsafe { builder.build_unchecked() };
185        BooleanArray::from(array_data)
186    }
187
188    /// Returns the current values buffer as a slice
189    ///
190    /// Boolean values are bit-packed into bytes. To extract the i-th boolean
191    /// from the bytes, you can use `arrow_buffer::bit_util::get_bit()`.
192    pub fn values_slice(&self) -> &[u8] {
193        self.values_builder.as_slice()
194    }
195
196    /// Returns the current null buffer as a slice
197    pub fn validity_slice(&self) -> Option<&[u8]> {
198        self.null_buffer_builder.as_slice()
199    }
200}
201
202impl ArrayBuilder for BooleanBuilder {
203    /// Returns the builder as a non-mutable `Any` reference.
204    fn as_any(&self) -> &dyn Any {
205        self
206    }
207
208    /// Returns the builder as a mutable `Any` reference.
209    fn as_any_mut(&mut self) -> &mut dyn Any {
210        self
211    }
212
213    /// Returns the boxed builder as a box of `Any`.
214    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
215        self
216    }
217
218    /// Returns the number of array slots in the builder
219    fn len(&self) -> usize {
220        self.values_builder.len()
221    }
222
223    /// Builds the array and reset this builder.
224    fn finish(&mut self) -> ArrayRef {
225        Arc::new(self.finish())
226    }
227
228    /// Builds the array without resetting the builder.
229    fn finish_cloned(&self) -> ArrayRef {
230        Arc::new(self.finish_cloned())
231    }
232}
233
234impl Extend<Option<bool>> for BooleanBuilder {
235    #[inline]
236    fn extend<T: IntoIterator<Item = Option<bool>>>(&mut self, iter: T) {
237        for v in iter {
238            self.append_option(v)
239        }
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use crate::Array;
247    use arrow_buffer::{BooleanBuffer, NullBuffer};
248
249    #[test]
250    fn test_boolean_array_builder() {
251        // 00000010 01001000
252        let buf = Buffer::from([72_u8, 2_u8]);
253        let mut builder = BooleanArray::builder(10);
254        for i in 0..10 {
255            if i == 3 || i == 6 || i == 9 {
256                builder.append_value(true);
257            } else {
258                builder.append_value(false);
259            }
260        }
261
262        let arr = builder.finish();
263        assert_eq!(&buf, arr.values().inner());
264        assert_eq!(10, arr.len());
265        assert_eq!(0, arr.offset());
266        assert_eq!(0, arr.null_count());
267        for i in 0..10 {
268            assert!(!arr.is_null(i));
269            assert!(arr.is_valid(i));
270            assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {i}")
271        }
272    }
273
274    #[test]
275    fn test_boolean_array_builder_append_slice() {
276        let arr1 = BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
277
278        let mut builder = BooleanArray::builder(0);
279        builder.append_slice(&[true, false]);
280        builder.append_null();
281        builder.append_null();
282        builder.append_value(false);
283        let arr2 = builder.finish();
284
285        assert_eq!(arr1, arr2);
286    }
287
288    #[test]
289    fn test_boolean_array_builder_append_slice_large() {
290        let arr1 = BooleanArray::from(vec![true; 513]);
291
292        let mut builder = BooleanArray::builder(512);
293        builder.append_slice(&[true; 513]);
294        let arr2 = builder.finish();
295
296        assert_eq!(arr1, arr2);
297    }
298
299    #[test]
300    fn test_boolean_array_builder_no_null() {
301        let mut builder = BooleanArray::builder(0);
302        builder.append_option(Some(true));
303        builder.append_value(false);
304        builder.append_slice(&[true, false, true]);
305        builder
306            .append_values(&[false, false, true], &[true, true, true])
307            .unwrap();
308
309        let array = builder.finish();
310        assert_eq!(0, array.null_count());
311        assert!(array.nulls().is_none());
312    }
313
314    #[test]
315    fn test_boolean_array_builder_finish_cloned() {
316        let mut builder = BooleanArray::builder(16);
317        builder.append_option(Some(true));
318        builder.append_value(false);
319        builder.append_slice(&[true, false, true]);
320        let mut array = builder.finish_cloned();
321        assert_eq!(3, array.true_count());
322        assert_eq!(2, array.false_count());
323
324        builder
325            .append_values(&[false, false, true], &[true, true, true])
326            .unwrap();
327
328        array = builder.finish();
329        assert_eq!(4, array.true_count());
330        assert_eq!(4, array.false_count());
331
332        assert_eq!(0, array.null_count());
333        assert!(array.nulls().is_none());
334    }
335
336    #[test]
337    fn test_extend() {
338        let mut builder = BooleanBuilder::new();
339        builder.extend([false, false, true, false, false].into_iter().map(Some));
340        builder.extend([true, true, false].into_iter().map(Some));
341        let array = builder.finish();
342        let values = array.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
343        assert_eq!(
344            &values,
345            &[false, false, true, false, false, true, true, false]
346        )
347    }
348
349    #[test]
350    fn test_boolean_array_builder_append_n() {
351        let mut builder = BooleanBuilder::new();
352        builder.append_n(3, true);
353        builder.append_n(2, false);
354        let array = builder.finish();
355        assert_eq!(3, array.true_count());
356        assert_eq!(2, array.false_count());
357        assert_eq!(0, array.null_count());
358
359        let values = array.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
360        assert_eq!(&values, &[true, true, true, false, false])
361    }
362
363    #[test]
364    fn test_append_array() {
365        let input = vec![
366            Some(true),
367            None,
368            Some(true),
369            None,
370            Some(false),
371            None,
372            None,
373            None,
374            Some(false),
375            Some(false),
376            Some(false),
377            Some(true),
378            Some(false),
379        ];
380        let arr1 = BooleanArray::from(input[..5].to_vec());
381        let arr2 = BooleanArray::from(input[5..8].to_vec());
382        let arr3 = BooleanArray::from(input[8..].to_vec());
383
384        let mut builder = BooleanBuilder::new();
385        builder.append_array(&arr1);
386        builder.append_array(&arr2);
387        builder.append_array(&arr3);
388        let actual = builder.finish();
389        let expected = BooleanArray::from(input);
390
391        assert_eq!(actual, expected);
392    }
393
394    #[test]
395    fn test_append_array_add_underlying_null_values() {
396        let array = BooleanArray::new(
397            BooleanBuffer::from(vec![true, false, true, false]),
398            Some(NullBuffer::from(&[true, true, false, false])),
399        );
400
401        let mut builder = BooleanBuilder::new();
402        builder.append_array(&array);
403        let actual = builder.finish();
404
405        assert_eq!(actual, array);
406        assert_eq!(actual.values(), array.values())
407    }
408}