arrow_buffer/builder/
boolean.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::bit_util::apply_bitwise_binary_op;
19use crate::{BooleanBuffer, Buffer, MutableBuffer, bit_util};
20use std::ops::Range;
21
22/// Builder for [`BooleanBuffer`]
23///
24/// # See Also
25///
26/// * [`NullBuffer`] for building [`BooleanBuffer`]s for representing nulls
27///
28/// [`NullBuffer`]: crate::NullBuffer
29#[derive(Debug)]
30pub struct BooleanBufferBuilder {
31    buffer: MutableBuffer,
32    len: usize,
33}
34
35impl BooleanBufferBuilder {
36    /// Creates a new `BooleanBufferBuilder` with sufficient space for
37    /// `capacity` bits (not bytes).
38    ///
39    /// The capacity is rounded up to the nearest multiple of 8 for the
40    /// allocation.
41    #[inline]
42    pub fn new(capacity: usize) -> Self {
43        let byte_capacity = bit_util::ceil(capacity, 8);
44        let buffer = MutableBuffer::new(byte_capacity);
45        Self { buffer, len: 0 }
46    }
47
48    /// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len`
49    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
50        assert!(len <= buffer.len() * 8);
51        let mut s = Self {
52            len: buffer.len() * 8,
53            buffer,
54        };
55        s.truncate(len);
56        s
57    }
58
59    /// Returns the length of the buffer
60    #[inline]
61    pub fn len(&self) -> usize {
62        self.len
63    }
64
65    /// Sets a bit in the buffer at `index`
66    #[inline]
67    pub fn set_bit(&mut self, index: usize, v: bool) {
68        if v {
69            bit_util::set_bit(self.buffer.as_mut(), index);
70        } else {
71            bit_util::unset_bit(self.buffer.as_mut(), index);
72        }
73    }
74
75    /// Gets a bit in the buffer at `index`
76    #[inline]
77    pub fn get_bit(&self, index: usize) -> bool {
78        bit_util::get_bit(self.buffer.as_slice(), index)
79    }
80
81    /// Returns true if empty
82    #[inline]
83    pub fn is_empty(&self) -> bool {
84        self.len == 0
85    }
86
87    /// Returns the capacity of the buffer, in bits (not bytes)
88    ///
89    /// Note this
90    ///
91    /// # Example
92    /// ```
93    /// # use arrow_buffer::builder::BooleanBufferBuilder;
94    /// // empty requires 0 bytes
95    /// let b = BooleanBufferBuilder::new(0);
96    /// assert_eq!(0, b.capacity());
97    /// // Creating space for 1 bit results in 64 bytes (space for 512 bits)
98    /// // (64 is the minimum allocation size for 64 bit architectures)
99    /// let mut b = BooleanBufferBuilder::new(1);
100    /// assert_eq!(512, b.capacity());
101    /// // 1000 bits requires 128 bytes (space for 1024 bits)
102    /// b.append_n(1000, true);
103    /// assert_eq!(1024, b.capacity());
104    /// ```
105    #[inline]
106    pub fn capacity(&self) -> usize {
107        self.buffer.capacity() * 8
108    }
109
110    /// Advances the buffer by `additional` bits
111    #[inline]
112    pub fn advance(&mut self, additional: usize) {
113        let new_len = self.len + additional;
114        let new_len_bytes = bit_util::ceil(new_len, 8);
115        if new_len_bytes > self.buffer.len() {
116            self.buffer.resize(new_len_bytes, 0);
117        }
118        self.len = new_len;
119    }
120
121    /// Truncates the builder to the given length
122    ///
123    /// If `len` is greater than the buffer's current length, this has no effect
124    #[inline]
125    pub fn truncate(&mut self, len: usize) {
126        if len > self.len {
127            return;
128        }
129
130        let new_len_bytes = bit_util::ceil(len, 8);
131        self.buffer.truncate(new_len_bytes);
132        self.len = len;
133
134        let remainder = self.len % 8;
135        if remainder != 0 {
136            let mask = (1_u8 << remainder).wrapping_sub(1);
137            *self.buffer.as_mut().last_mut().unwrap() &= mask;
138        }
139    }
140
141    /// Reserve space to at least `additional` new bits.
142    /// Capacity will be `>= self.len() + additional`.
143    #[inline]
144    pub fn reserve(&mut self, additional: usize) {
145        let capacity = self.len + additional;
146        if capacity > self.capacity() {
147            // convert differential to bytes
148            let additional = bit_util::ceil(capacity, 8) - self.buffer.len();
149            self.buffer.reserve(additional);
150        }
151    }
152
153    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
154    /// growing it (potentially reallocating it) and writing `false` in the newly available bits.
155    #[inline]
156    pub fn resize(&mut self, len: usize) {
157        match len.checked_sub(self.len) {
158            Some(delta) => self.advance(delta),
159            None => self.truncate(len),
160        }
161    }
162
163    /// Appends a boolean `v` into the buffer
164    #[inline]
165    pub fn append(&mut self, v: bool) {
166        self.advance(1);
167        if v {
168            unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) };
169        }
170    }
171
172    /// Appends n `additional` bits of value `v` into the buffer
173    #[inline]
174    pub fn append_n(&mut self, additional: usize, v: bool) {
175        match v {
176            true => {
177                let new_len = self.len + additional;
178                let new_len_bytes = bit_util::ceil(new_len, 8);
179                let cur_remainder = self.len % 8;
180                let new_remainder = new_len % 8;
181
182                if cur_remainder != 0 {
183                    // Pad last byte with 1s
184                    *self.buffer.as_slice_mut().last_mut().unwrap() |= !((1 << cur_remainder) - 1)
185                }
186                self.buffer.resize(new_len_bytes, 0xFF);
187                if new_remainder != 0 {
188                    // Clear remaining bits
189                    *self.buffer.as_slice_mut().last_mut().unwrap() &= (1 << new_remainder) - 1
190                }
191                self.len = new_len;
192            }
193            false => self.advance(additional),
194        }
195    }
196
197    /// Appends a slice of booleans into the buffer
198    #[inline]
199    pub fn append_slice(&mut self, slice: &[bool]) {
200        let additional = slice.len();
201        self.advance(additional);
202
203        let offset = self.len() - additional;
204        for (i, v) in slice.iter().enumerate() {
205            if *v {
206                unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) }
207            }
208        }
209    }
210
211    /// Append `range` bits from `to_set`
212    ///
213    /// `to_set` is a slice of bits packed LSB-first into `[u8]`
214    ///
215    /// # Panics
216    ///
217    /// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes
218    pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
219        let offset_write = self.len;
220        let len = range.end - range.start;
221        // allocate new bits as 0
222        self.advance(len);
223        // copy bits from to_set into self.buffer a word at a time
224        apply_bitwise_binary_op(
225            self.buffer.as_slice_mut(),
226            offset_write,
227            to_set,
228            range.start,
229            len,
230            |_a, b| b, // copy bits from to_set
231        );
232    }
233
234    /// Append [`BooleanBuffer`] to this [`BooleanBufferBuilder`]
235    pub fn append_buffer(&mut self, buffer: &BooleanBuffer) {
236        let range = buffer.offset()..buffer.offset() + buffer.len();
237        self.append_packed_range(range, buffer.values())
238    }
239
240    /// Returns the packed bits
241    pub fn as_slice(&self) -> &[u8] {
242        self.buffer.as_slice()
243    }
244
245    /// Returns the packed bits
246    pub fn as_slice_mut(&mut self) -> &mut [u8] {
247        self.buffer.as_slice_mut()
248    }
249
250    /// Creates a [`BooleanBuffer`]
251    #[inline]
252    pub fn finish(&mut self) -> BooleanBuffer {
253        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
254        let len = std::mem::replace(&mut self.len, 0);
255        BooleanBuffer::new(buf.into(), 0, len)
256    }
257
258    /// Builds the [BooleanBuffer] without resetting the builder.
259    pub fn finish_cloned(&self) -> BooleanBuffer {
260        BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len)
261    }
262}
263
264impl From<BooleanBufferBuilder> for Buffer {
265    #[inline]
266    fn from(builder: BooleanBufferBuilder) -> Self {
267        builder.buffer.into()
268    }
269}
270
271impl From<BooleanBufferBuilder> for BooleanBuffer {
272    #[inline]
273    fn from(builder: BooleanBufferBuilder) -> Self {
274        BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
275    }
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    #[test]
283    fn test_boolean_buffer_builder_write_bytes() {
284        let mut b = BooleanBufferBuilder::new(4);
285        b.append(false);
286        b.append(true);
287        b.append(false);
288        b.append(true);
289        assert_eq!(4, b.len());
290        assert_eq!(512, b.capacity());
291        let buffer = b.finish();
292        assert_eq!(4, buffer.len());
293
294        // Overallocate capacity
295        let mut b = BooleanBufferBuilder::new(8);
296        b.append_slice(&[false, true, false, true]);
297        assert_eq!(4, b.len());
298        assert_eq!(512, b.capacity());
299        let buffer = b.finish();
300        assert_eq!(4, buffer.len());
301    }
302
303    #[test]
304    fn test_boolean_buffer_builder_unset_first_bit() {
305        let mut buffer = BooleanBufferBuilder::new(4);
306        buffer.append(true);
307        buffer.append(true);
308        buffer.append(false);
309        buffer.append(true);
310        buffer.set_bit(0, false);
311        assert_eq!(buffer.len(), 4);
312        assert_eq!(buffer.finish().values(), &[0b1010_u8]);
313    }
314
315    #[test]
316    fn test_boolean_buffer_builder_unset_last_bit() {
317        let mut buffer = BooleanBufferBuilder::new(4);
318        buffer.append(true);
319        buffer.append(true);
320        buffer.append(false);
321        buffer.append(true);
322        buffer.set_bit(3, false);
323        assert_eq!(buffer.len(), 4);
324        assert_eq!(buffer.finish().values(), &[0b0011_u8]);
325    }
326
327    #[test]
328    fn test_boolean_buffer_builder_unset_an_inner_bit() {
329        let mut buffer = BooleanBufferBuilder::new(5);
330        buffer.append(true);
331        buffer.append(true);
332        buffer.append(false);
333        buffer.append(true);
334        buffer.set_bit(1, false);
335        assert_eq!(buffer.len(), 4);
336        assert_eq!(buffer.finish().values(), &[0b1001_u8]);
337    }
338
339    #[test]
340    fn test_boolean_buffer_builder_unset_several_bits() {
341        let mut buffer = BooleanBufferBuilder::new(5);
342        buffer.append(true);
343        buffer.append(true);
344        buffer.append(true);
345        buffer.append(false);
346        buffer.append(true);
347        buffer.set_bit(1, false);
348        buffer.set_bit(2, false);
349        assert_eq!(buffer.len(), 5);
350        assert_eq!(buffer.finish().values(), &[0b10001_u8]);
351    }
352
353    #[test]
354    fn test_boolean_buffer_builder_unset_several_bits_bigger_than_one_byte() {
355        let mut buffer = BooleanBufferBuilder::new(16);
356        buffer.append_n(10, true);
357        buffer.set_bit(0, false);
358        buffer.set_bit(3, false);
359        buffer.set_bit(9, false);
360        assert_eq!(buffer.len(), 10);
361        assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]);
362    }
363
364    #[test]
365    fn test_boolean_buffer_builder_flip_several_bits_bigger_than_one_byte() {
366        let mut buffer = BooleanBufferBuilder::new(16);
367        buffer.append_n(5, true);
368        buffer.append_n(5, false);
369        buffer.append_n(5, true);
370        buffer.set_bit(0, false);
371        buffer.set_bit(3, false);
372        buffer.set_bit(9, false);
373        buffer.set_bit(6, true);
374        buffer.set_bit(14, true);
375        buffer.set_bit(13, false);
376        assert_eq!(buffer.len(), 15);
377        assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]);
378    }
379
380    #[test]
381    fn test_bool_buffer_builder_get_first_bit() {
382        let mut buffer = BooleanBufferBuilder::new(16);
383        buffer.append_n(8, true);
384        buffer.append_n(8, false);
385        assert!(buffer.get_bit(0));
386    }
387
388    #[test]
389    fn test_bool_buffer_builder_get_first_bit_not_requires_mutability() {
390        let buffer = {
391            let mut buffer = BooleanBufferBuilder::new(16);
392            buffer.append_n(8, true);
393            buffer
394        };
395
396        assert!(buffer.get_bit(0));
397    }
398
399    #[test]
400    fn test_bool_buffer_builder_get_last_bit() {
401        let mut buffer = BooleanBufferBuilder::new(16);
402        buffer.append_n(8, true);
403        buffer.append_n(8, false);
404        assert!(!buffer.get_bit(15));
405    }
406
407    #[test]
408    fn test_bool_buffer_builder_get_an_inner_bit() {
409        let mut buffer = BooleanBufferBuilder::new(16);
410        buffer.append_n(4, false);
411        buffer.append_n(8, true);
412        buffer.append_n(4, false);
413        assert!(buffer.get_bit(11));
414    }
415
416    #[test]
417    fn test_bool_buffer_fuzz() {
418        use rand::prelude::*;
419
420        let mut buffer = BooleanBufferBuilder::new(12);
421        let mut all_bools = vec![];
422        let mut rng = rand::rng();
423
424        let src_len = 32;
425        let (src, compacted_src) = {
426            let src: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
427                .take(src_len)
428                .collect();
429
430            let mut compacted_src = BooleanBufferBuilder::new(src_len);
431            compacted_src.append_slice(&src);
432            (src, compacted_src.finish())
433        };
434
435        for _ in 0..100 {
436            let a = rng.next_u32() as usize % src_len;
437            let b = rng.next_u32() as usize % src_len;
438
439            let start = a.min(b);
440            let end = a.max(b);
441
442            buffer.append_packed_range(start..end, compacted_src.values());
443            all_bools.extend_from_slice(&src[start..end]);
444        }
445
446        let mut compacted = BooleanBufferBuilder::new(all_bools.len());
447        compacted.append_slice(&all_bools);
448
449        assert_eq!(buffer.finish(), compacted.finish())
450    }
451
452    #[test]
453    fn test_boolean_array_builder_resize() {
454        let mut builder = BooleanBufferBuilder::new(20);
455        builder.append_n(4, true);
456        builder.append_n(7, false);
457        builder.append_n(2, true);
458        builder.resize(20);
459
460        assert_eq!(builder.len(), 20);
461        assert_eq!(builder.as_slice(), &[0b00001111, 0b00011000, 0b00000000]);
462
463        builder.resize(5);
464        assert_eq!(builder.len(), 5);
465        assert_eq!(builder.as_slice(), &[0b00001111]);
466
467        builder.append_n(4, true);
468        assert_eq!(builder.len(), 9);
469        assert_eq!(builder.as_slice(), &[0b11101111, 0b00000001]);
470    }
471
472    #[test]
473    fn test_truncate() {
474        let b = MutableBuffer::from_iter([true, true, true, true]);
475        let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2);
476        builder.advance(2);
477        let finished = builder.finish();
478        assert_eq!(finished.values(), &[0b00000011]);
479
480        let mut builder = BooleanBufferBuilder::new(10);
481        builder.append_n(5, true);
482        builder.resize(3);
483        builder.advance(2);
484        let finished = builder.finish();
485        assert_eq!(finished.values(), &[0b00000111]);
486
487        let mut builder = BooleanBufferBuilder::new(10);
488        builder.append_n(16, true);
489        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
490        builder.truncate(20);
491        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
492        builder.truncate(14);
493        assert_eq!(builder.as_slice(), &[0xFF, 0b00111111]);
494        builder.append(false);
495        builder.append(true);
496        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111]);
497        builder.append_packed_range(0..3, &[0xFF]);
498        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000111]);
499        builder.truncate(17);
500        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000001]);
501        builder.append_packed_range(0..2, &[2]);
502        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b0000101]);
503        builder.truncate(8);
504        assert_eq!(builder.as_slice(), &[0xFF]);
505        builder.resize(14);
506        assert_eq!(builder.as_slice(), &[0xFF, 0x00]);
507        builder.truncate(0);
508        assert_eq!(builder.as_slice(), &[]);
509    }
510
511    #[test]
512    fn test_boolean_builder_increases_buffer_len() {
513        // 00000010 01001000
514        let buf = Buffer::from([72_u8, 2_u8]);
515        let mut builder = BooleanBufferBuilder::new(8);
516
517        for i in 0..16 {
518            if i == 3 || i == 6 || i == 9 {
519                builder.append(true);
520            } else {
521                builder.append(false);
522            }
523        }
524        let buf2 = builder.finish();
525
526        assert_eq!(buf.len(), buf2.inner().len());
527        assert_eq!(buf.as_slice(), buf2.values());
528    }
529}