Skip to main content

arrow_buffer/builder/
boolean.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::bit_util::apply_bitwise_binary_op;
19use crate::{BooleanBuffer, Buffer, MutableBuffer, NullBuffer, bit_util};
20use std::ops::Range;
21
22/// Builder for [`BooleanBuffer`]
23///
24/// Builds a packed buffer of bits representing boolean values. Each bit in the
25/// buffer corresponds to a boolean value,
26///
27/// # See Also
28///
29/// * [`NullBufferBuilder`] for building [`BooleanBuffer`]s for representing nulls
30/// * [`BufferBuilder`] for building [`Buffer`]s
31///
32/// # Example
33/// ```
34/// # use arrow_buffer::builder::BooleanBufferBuilder;
35/// let mut builder = BooleanBufferBuilder::new(10);
36/// builder.append(true);
37/// builder.append(false);
38/// builder.append_n(3, true); // append 3 trues
39/// let buffer = builder.build();
40/// assert_eq!(buffer.len(), 5); // 5 bits appended
41/// assert_eq!(buffer.values(), &[0b00011101_u8]); // packed bits
42///```
43///
44/// [`BufferBuilder`]: crate::builder::BufferBuilder
45/// [`NullBufferBuilder`]: crate::builder::NullBufferBuilder
46#[derive(Debug)]
47pub struct BooleanBufferBuilder {
48    buffer: MutableBuffer,
49    len: usize,
50}
51
52impl BooleanBufferBuilder {
53    /// Creates a new `BooleanBufferBuilder` with sufficient space for
54    /// `capacity` bits (not bytes).
55    ///
56    /// The capacity is rounded up to the nearest multiple of 8 for the
57    /// allocation.
58    #[inline]
59    pub fn new(capacity: usize) -> Self {
60        let byte_capacity = bit_util::ceil(capacity, 8);
61        let buffer = MutableBuffer::new(byte_capacity);
62        Self { buffer, len: 0 }
63    }
64
65    /// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len`
66    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
67        assert!(len <= buffer.len() * 8);
68        let mut s = Self {
69            len: buffer.len() * 8,
70            buffer,
71        };
72        s.truncate(len);
73        s
74    }
75
76    /// Returns the length of the buffer
77    #[inline]
78    pub fn len(&self) -> usize {
79        self.len
80    }
81
82    /// Sets a bit in the buffer at `index`
83    #[inline]
84    pub fn set_bit(&mut self, index: usize, v: bool) {
85        if v {
86            bit_util::set_bit(self.buffer.as_mut(), index);
87        } else {
88            bit_util::unset_bit(self.buffer.as_mut(), index);
89        }
90    }
91
92    /// Gets a bit in the buffer at `index`
93    #[inline]
94    pub fn get_bit(&self, index: usize) -> bool {
95        bit_util::get_bit(self.buffer.as_slice(), index)
96    }
97
98    /// Returns true if empty
99    #[inline]
100    pub fn is_empty(&self) -> bool {
101        self.len == 0
102    }
103
104    /// Returns the capacity of the buffer, in bits (not bytes)
105    ///
106    /// Note this
107    ///
108    /// # Example
109    /// ```
110    /// # use arrow_buffer::builder::BooleanBufferBuilder;
111    /// // empty requires 0 bytes
112    /// let b = BooleanBufferBuilder::new(0);
113    /// assert_eq!(0, b.capacity());
114    /// // Creating space for 1 bit results in 64 bytes (space for 512 bits)
115    /// // (64 is the minimum allocation size for 64 bit architectures)
116    /// let mut b = BooleanBufferBuilder::new(1);
117    /// assert_eq!(512, b.capacity());
118    /// // 1000 bits requires 128 bytes (space for 1024 bits)
119    /// b.append_n(1000, true);
120    /// assert_eq!(1024, b.capacity());
121    /// ```
122    #[inline]
123    pub fn capacity(&self) -> usize {
124        self.buffer.capacity() * 8
125    }
126
127    /// Advances the buffer by `additional` bits
128    #[inline]
129    pub fn advance(&mut self, additional: usize) {
130        let new_len = self.len + additional;
131        let new_len_bytes = bit_util::ceil(new_len, 8);
132        if new_len_bytes > self.buffer.len() {
133            self.buffer.resize(new_len_bytes, 0);
134        }
135        self.len = new_len;
136    }
137
138    /// Truncates the builder to the given length
139    ///
140    /// If `len` is greater than the buffer's current length, this has no effect
141    #[inline]
142    pub fn truncate(&mut self, len: usize) {
143        if len > self.len {
144            return;
145        }
146
147        let new_len_bytes = bit_util::ceil(len, 8);
148        self.buffer.truncate(new_len_bytes);
149        self.len = len;
150
151        let remainder = self.len % 8;
152        if remainder != 0 {
153            let mask = (1_u8 << remainder).wrapping_sub(1);
154            *self.buffer.as_mut().last_mut().unwrap() &= mask;
155        }
156    }
157
158    /// Reserve space to at least `additional` new bits.
159    /// Capacity will be `>= self.len() + additional`.
160    #[inline]
161    pub fn reserve(&mut self, additional: usize) {
162        let capacity = self.len + additional;
163        if capacity > self.capacity() {
164            // convert differential to bytes
165            let additional = bit_util::ceil(capacity, 8) - self.buffer.len();
166            self.buffer.reserve(additional);
167        }
168    }
169
170    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
171    /// growing it (potentially reallocating it) and writing `false` in the newly available bits.
172    #[inline]
173    pub fn resize(&mut self, len: usize) {
174        match len.checked_sub(self.len) {
175            Some(delta) => self.advance(delta),
176            None => self.truncate(len),
177        }
178    }
179
180    /// Appends a boolean `v` into the buffer
181    #[inline]
182    pub fn append(&mut self, v: bool) {
183        self.advance(1);
184        if v {
185            unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) };
186        }
187    }
188
189    /// Appends n `additional` bits of value `v` into the buffer
190    #[inline]
191    pub fn append_n(&mut self, additional: usize, v: bool) {
192        match v {
193            true => {
194                let new_len = self.len + additional;
195                let new_len_bytes = bit_util::ceil(new_len, 8);
196                let cur_remainder = self.len % 8;
197                let new_remainder = new_len % 8;
198
199                if cur_remainder != 0 {
200                    // Pad last byte with 1s
201                    *self.buffer.as_slice_mut().last_mut().unwrap() |= !((1 << cur_remainder) - 1)
202                }
203                self.buffer.resize(new_len_bytes, 0xFF);
204                if new_remainder != 0 {
205                    // Clear remaining bits
206                    *self.buffer.as_slice_mut().last_mut().unwrap() &= (1 << new_remainder) - 1
207                }
208                self.len = new_len;
209            }
210            false => self.advance(additional),
211        }
212    }
213
214    /// Appends a slice of booleans into the buffer
215    #[inline]
216    pub fn append_slice(&mut self, slice: &[bool]) {
217        let additional = slice.len();
218        self.advance(additional);
219
220        let offset = self.len() - additional;
221        for (i, v) in slice.iter().enumerate() {
222            if *v {
223                unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) }
224            }
225        }
226    }
227
228    /// Append `range` bits from `to_set`
229    ///
230    /// `to_set` is a slice of bits packed LSB-first into `[u8]`
231    ///
232    /// # Panics
233    ///
234    /// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes
235    pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
236        let offset_write = self.len;
237        let len = range.end - range.start;
238        // allocate new bits as 0
239        self.advance(len);
240        // copy bits from to_set into self.buffer a word at a time
241        apply_bitwise_binary_op(
242            self.buffer.as_slice_mut(),
243            offset_write,
244            to_set,
245            range.start,
246            len,
247            |_a, b| b, // copy bits from to_set
248        );
249    }
250
251    /// Append [`BooleanBuffer`] to this [`BooleanBufferBuilder`]
252    pub fn append_buffer(&mut self, buffer: &BooleanBuffer) {
253        let range = buffer.offset()..buffer.offset() + buffer.len();
254        self.append_packed_range(range, buffer.values())
255    }
256
257    /// Returns the packed bits
258    pub fn as_slice(&self) -> &[u8] {
259        self.buffer.as_slice()
260    }
261
262    /// Returns the packed bits
263    pub fn as_slice_mut(&mut self) -> &mut [u8] {
264        self.buffer.as_slice_mut()
265    }
266
267    /// Resets this builder and returns a [`BooleanBuffer`].
268    ///
269    /// Use [`Self::build`] when you don't need to reuse this builder.
270    #[inline]
271    pub fn finish(&mut self) -> BooleanBuffer {
272        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
273        let len = std::mem::replace(&mut self.len, 0);
274        BooleanBuffer::new(buf.into(), 0, len)
275    }
276
277    /// Builds a [`BooleanBuffer`] without resetting the builder.
278    ///
279    /// This consumes the builder. Use [`Self::finish`] to reuse it.
280    #[inline]
281    pub fn build(self) -> BooleanBuffer {
282        BooleanBuffer::new(self.buffer.into(), 0, self.len)
283    }
284
285    /// Builds the [BooleanBuffer] without resetting the builder.
286    pub fn finish_cloned(&self) -> BooleanBuffer {
287        BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len)
288    }
289
290    /// Extends the builder from a trusted length iterator of booleans.
291    /// # Safety
292    /// Callers must ensure that `iter` reports an exact size via `size_hint`.
293    ///
294    #[inline]
295    pub unsafe fn extend_trusted_len<I>(&mut self, iterator: I)
296    where
297        I: Iterator<Item = bool>,
298    {
299        let len = iterator.size_hint().0;
300        unsafe { self.buffer.extend_bool_trusted_len(iterator, self.len) };
301        self.len += len;
302    }
303}
304
305impl From<BooleanBufferBuilder> for Buffer {
306    #[inline]
307    fn from(builder: BooleanBufferBuilder) -> Self {
308        builder.buffer.into()
309    }
310}
311
312impl From<BooleanBufferBuilder> for BooleanBuffer {
313    #[inline]
314    fn from(builder: BooleanBufferBuilder) -> Self {
315        builder.build()
316    }
317}
318
319impl From<BooleanBufferBuilder> for NullBuffer {
320    #[inline]
321    fn from(builder: BooleanBufferBuilder) -> Self {
322        let boolean_buffer = BooleanBuffer::from(builder);
323        NullBuffer::new(boolean_buffer)
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    #[test]
332    fn test_boolean_buffer_builder_write_bytes() {
333        let mut b = BooleanBufferBuilder::new(4);
334        b.append(false);
335        b.append(true);
336        b.append(false);
337        b.append(true);
338        assert_eq!(4, b.len());
339        assert_eq!(512, b.capacity());
340        let buffer = b.finish();
341        assert_eq!(4, buffer.len());
342
343        // Overallocate capacity
344        let mut b = BooleanBufferBuilder::new(8);
345        b.append_slice(&[false, true, false, true]);
346        assert_eq!(4, b.len());
347        assert_eq!(512, b.capacity());
348        let buffer = b.finish();
349        assert_eq!(4, buffer.len());
350    }
351
352    #[test]
353    fn test_boolean_buffer_builder_unset_first_bit() {
354        let mut buffer = BooleanBufferBuilder::new(4);
355        buffer.append(true);
356        buffer.append(true);
357        buffer.append(false);
358        buffer.append(true);
359        buffer.set_bit(0, false);
360        assert_eq!(buffer.len(), 4);
361        assert_eq!(buffer.finish().values(), &[0b1010_u8]);
362    }
363
364    #[test]
365    fn test_boolean_buffer_builder_unset_last_bit() {
366        let mut buffer = BooleanBufferBuilder::new(4);
367        buffer.append(true);
368        buffer.append(true);
369        buffer.append(false);
370        buffer.append(true);
371        buffer.set_bit(3, false);
372        assert_eq!(buffer.len(), 4);
373        assert_eq!(buffer.finish().values(), &[0b0011_u8]);
374    }
375
376    #[test]
377    fn test_boolean_buffer_builder_unset_an_inner_bit() {
378        let mut buffer = BooleanBufferBuilder::new(5);
379        buffer.append(true);
380        buffer.append(true);
381        buffer.append(false);
382        buffer.append(true);
383        buffer.set_bit(1, false);
384        assert_eq!(buffer.len(), 4);
385        assert_eq!(buffer.finish().values(), &[0b1001_u8]);
386    }
387
388    #[test]
389    fn test_boolean_buffer_builder_unset_several_bits() {
390        let mut buffer = BooleanBufferBuilder::new(5);
391        buffer.append(true);
392        buffer.append(true);
393        buffer.append(true);
394        buffer.append(false);
395        buffer.append(true);
396        buffer.set_bit(1, false);
397        buffer.set_bit(2, false);
398        assert_eq!(buffer.len(), 5);
399        assert_eq!(buffer.finish().values(), &[0b10001_u8]);
400    }
401
402    #[test]
403    fn test_boolean_buffer_builder_unset_several_bits_bigger_than_one_byte() {
404        let mut buffer = BooleanBufferBuilder::new(16);
405        buffer.append_n(10, true);
406        buffer.set_bit(0, false);
407        buffer.set_bit(3, false);
408        buffer.set_bit(9, false);
409        assert_eq!(buffer.len(), 10);
410        assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]);
411    }
412
413    #[test]
414    fn test_boolean_buffer_builder_flip_several_bits_bigger_than_one_byte() {
415        let mut buffer = BooleanBufferBuilder::new(16);
416        buffer.append_n(5, true);
417        buffer.append_n(5, false);
418        buffer.append_n(5, true);
419        buffer.set_bit(0, false);
420        buffer.set_bit(3, false);
421        buffer.set_bit(9, false);
422        buffer.set_bit(6, true);
423        buffer.set_bit(14, true);
424        buffer.set_bit(13, false);
425        assert_eq!(buffer.len(), 15);
426        assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]);
427    }
428
429    #[test]
430    fn test_bool_buffer_builder_get_first_bit() {
431        let mut buffer = BooleanBufferBuilder::new(16);
432        buffer.append_n(8, true);
433        buffer.append_n(8, false);
434        assert!(buffer.get_bit(0));
435    }
436
437    #[test]
438    fn test_bool_buffer_builder_get_first_bit_not_requires_mutability() {
439        let buffer = {
440            let mut buffer = BooleanBufferBuilder::new(16);
441            buffer.append_n(8, true);
442            buffer
443        };
444
445        assert!(buffer.get_bit(0));
446    }
447
448    #[test]
449    fn test_bool_buffer_builder_get_last_bit() {
450        let mut buffer = BooleanBufferBuilder::new(16);
451        buffer.append_n(8, true);
452        buffer.append_n(8, false);
453        assert!(!buffer.get_bit(15));
454    }
455
456    #[test]
457    fn test_bool_buffer_builder_get_an_inner_bit() {
458        let mut buffer = BooleanBufferBuilder::new(16);
459        buffer.append_n(4, false);
460        buffer.append_n(8, true);
461        buffer.append_n(4, false);
462        assert!(buffer.get_bit(11));
463    }
464
465    #[test]
466    fn test_bool_buffer_fuzz() {
467        use rand::prelude::*;
468
469        let mut buffer = BooleanBufferBuilder::new(12);
470        let mut all_bools = vec![];
471        let mut rng = rand::rng();
472
473        let src_len = 32;
474        let (src, compacted_src) = {
475            let src: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
476                .take(src_len)
477                .collect();
478
479            let mut compacted_src = BooleanBufferBuilder::new(src_len);
480            compacted_src.append_slice(&src);
481            (src, compacted_src.finish())
482        };
483
484        for _ in 0..100 {
485            let a = rng.next_u32() as usize % src_len;
486            let b = rng.next_u32() as usize % src_len;
487
488            let start = a.min(b);
489            let end = a.max(b);
490
491            buffer.append_packed_range(start..end, compacted_src.values());
492            all_bools.extend_from_slice(&src[start..end]);
493        }
494
495        let mut compacted = BooleanBufferBuilder::new(all_bools.len());
496        compacted.append_slice(&all_bools);
497
498        assert_eq!(buffer.finish(), compacted.finish())
499    }
500
501    #[test]
502    fn test_boolean_array_builder_resize() {
503        let mut builder = BooleanBufferBuilder::new(20);
504        builder.append_n(4, true);
505        builder.append_n(7, false);
506        builder.append_n(2, true);
507        builder.resize(20);
508
509        assert_eq!(builder.len(), 20);
510        assert_eq!(builder.as_slice(), &[0b00001111, 0b00011000, 0b00000000]);
511
512        builder.resize(5);
513        assert_eq!(builder.len(), 5);
514        assert_eq!(builder.as_slice(), &[0b00001111]);
515
516        builder.append_n(4, true);
517        assert_eq!(builder.len(), 9);
518        assert_eq!(builder.as_slice(), &[0b11101111, 0b00000001]);
519    }
520
521    #[test]
522    fn test_truncate() {
523        let b = MutableBuffer::from_iter([true, true, true, true]);
524        let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2);
525        builder.advance(2);
526        let finished = builder.finish();
527        assert_eq!(finished.values(), &[0b00000011]);
528
529        let mut builder = BooleanBufferBuilder::new(10);
530        builder.append_n(5, true);
531        builder.resize(3);
532        builder.advance(2);
533        let finished = builder.finish();
534        assert_eq!(finished.values(), &[0b00000111]);
535
536        let mut builder = BooleanBufferBuilder::new(10);
537        builder.append_n(16, true);
538        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
539        builder.truncate(20);
540        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
541        builder.truncate(14);
542        assert_eq!(builder.as_slice(), &[0xFF, 0b00111111]);
543        builder.append(false);
544        builder.append(true);
545        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111]);
546        builder.append_packed_range(0..3, &[0xFF]);
547        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000111]);
548        builder.truncate(17);
549        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000001]);
550        builder.append_packed_range(0..2, &[2]);
551        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b0000101]);
552        builder.truncate(8);
553        assert_eq!(builder.as_slice(), &[0xFF]);
554        builder.resize(14);
555        assert_eq!(builder.as_slice(), &[0xFF, 0x00]);
556        builder.truncate(0);
557        assert_eq!(builder.as_slice(), &[]);
558    }
559
560    #[test]
561    fn test_boolean_builder_increases_buffer_len() {
562        // 00000010 01001000
563        let buf = Buffer::from([72_u8, 2_u8]);
564        let mut builder = BooleanBufferBuilder::new(8);
565
566        for i in 0..16 {
567            if i == 3 || i == 6 || i == 9 {
568                builder.append(true);
569            } else {
570                builder.append(false);
571            }
572        }
573        let buf2 = builder.finish();
574
575        assert_eq!(buf.len(), buf2.inner().len());
576        assert_eq!(buf.as_slice(), buf2.values());
577    }
578
579    #[test]
580    fn test_extend() {
581        let mut builder = BooleanBufferBuilder::new(0);
582        let bools = vec![true, false, true, true, false, true, true, true, false];
583        unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
584        assert_eq!(builder.len(), 9);
585        let finished = builder.finish();
586        for (i, v) in bools.into_iter().enumerate() {
587            assert_eq!(finished.value(i), v);
588        }
589
590        // Test > 64 bits
591        let mut builder = BooleanBufferBuilder::new(0);
592        let bools: Vec<_> = (0..100).map(|i| i % 3 == 0 || i % 7 == 0).collect();
593        unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
594        assert_eq!(builder.len(), 100);
595        let finished = builder.finish();
596        for (i, v) in bools.into_iter().enumerate() {
597            assert_eq!(finished.value(i), v, "at index {}", i);
598        }
599    }
600
601    #[test]
602    fn test_extend_misaligned() {
603        // Test misaligned start
604        for offset in 1..65 {
605            let mut builder = BooleanBufferBuilder::new(0);
606            builder.append_n(offset, false);
607
608            let bools: Vec<_> = (0..100).map(|i| i % 3 == 0 || i % 7 == 0).collect();
609            unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
610            assert_eq!(builder.len(), offset + 100);
611
612            let finished = builder.finish();
613            for i in 0..offset {
614                assert!(!finished.value(i));
615            }
616            for (i, v) in bools.into_iter().enumerate() {
617                assert_eq!(finished.value(offset + i), v, "at index {}", offset + i);
618            }
619        }
620    }
621
622    #[test]
623    fn test_extend_misaligned_end() {
624        for len in 1..130 {
625            let mut builder = BooleanBufferBuilder::new(0);
626            let mut bools: Vec<_> = (0..len).map(|i| i % 2 == 0).collect();
627            unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
628            unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
629            let copy = bools.clone();
630            bools.extend(copy);
631            assert_eq!(builder.len(), 2 * len);
632
633            let finished = builder.finish();
634            for (i, &v) in bools.iter().enumerate() {
635                assert_eq!(finished.value(i), v, "at index {} for len {}", i, len);
636            }
637        }
638    }
639}