arrow_buffer/buffer/
boolean.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::bit_chunk_iterator::BitChunks;
19use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, BitIterator, BitSliceIterator};
20use crate::{
21    bit_util, buffer_bin_and, buffer_bin_or, buffer_bin_xor, buffer_unary_not,
22    BooleanBufferBuilder, Buffer, MutableBuffer,
23};
24
25use std::ops::{BitAnd, BitOr, BitXor, Not};
26
27/// A slice-able [`Buffer`] containing bit-packed booleans
28///
29/// `BooleanBuffer`s can be creating using [`BooleanBufferBuilder`]
30///
31/// # See Also
32///
33/// * [`NullBuffer`] for representing null values in Arrow arrays
34///
35/// [`NullBuffer`]: crate::NullBuffer
36#[derive(Debug, Clone, Eq)]
37pub struct BooleanBuffer {
38    buffer: Buffer,
39    offset: usize,
40    len: usize,
41}
42
43impl PartialEq for BooleanBuffer {
44    fn eq(&self, other: &Self) -> bool {
45        if self.len != other.len {
46            return false;
47        }
48
49        let lhs = self.bit_chunks().iter_padded();
50        let rhs = other.bit_chunks().iter_padded();
51        lhs.zip(rhs).all(|(a, b)| a == b)
52    }
53}
54
55impl BooleanBuffer {
56    /// Create a new [`BooleanBuffer`] from a [`Buffer`], an `offset` and `length` in bits
57    ///
58    /// # Panics
59    ///
60    /// This method will panic if `buffer` is not large enough
61    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
62        let total_len = offset.saturating_add(len);
63        let buffer_len = buffer.len();
64        let bit_len = buffer_len.saturating_mul(8);
65        assert!(
66            total_len <= bit_len,
67            "buffer not large enough (offset: {offset}, len: {len}, buffer_len: {buffer_len})"
68        );
69        Self {
70            buffer,
71            offset,
72            len,
73        }
74    }
75
76    /// Create a new [`BooleanBuffer`] of `length` where all values are `true`
77    pub fn new_set(length: usize) -> Self {
78        let mut builder = BooleanBufferBuilder::new(length);
79        builder.append_n(length, true);
80        builder.finish()
81    }
82
83    /// Create a new [`BooleanBuffer`] of `length` where all values are `false`
84    pub fn new_unset(length: usize) -> Self {
85        let buffer = MutableBuffer::new_null(length).into_buffer();
86        Self {
87            buffer,
88            offset: 0,
89            len: length,
90        }
91    }
92
93    /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer`
94    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
95        let buffer = MutableBuffer::collect_bool(len, f);
96        Self::new(buffer.into(), 0, len)
97    }
98
99    /// Returns the number of set bits in this buffer
100    pub fn count_set_bits(&self) -> usize {
101        self.buffer.count_set_bits_offset(self.offset, self.len)
102    }
103
104    /// Returns a `BitChunks` instance which can be used to iterate over
105    /// this buffer's bits in `u64` chunks
106    #[inline]
107    pub fn bit_chunks(&self) -> BitChunks<'_> {
108        BitChunks::new(self.values(), self.offset, self.len)
109    }
110
111    /// Returns the offset of this [`BooleanBuffer`] in bits
112    #[inline]
113    pub fn offset(&self) -> usize {
114        self.offset
115    }
116
117    /// Returns the length of this [`BooleanBuffer`] in bits
118    #[inline]
119    pub fn len(&self) -> usize {
120        self.len
121    }
122
123    /// Returns true if this [`BooleanBuffer`] is empty
124    #[inline]
125    pub fn is_empty(&self) -> bool {
126        self.len == 0
127    }
128
129    /// Free up unused memory.
130    pub fn shrink_to_fit(&mut self) {
131        // TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
132        self.buffer.shrink_to_fit();
133    }
134
135    /// Returns the boolean value at index `i`.
136    ///
137    /// # Panics
138    ///
139    /// Panics if `i >= self.len()`
140    #[inline]
141    pub fn value(&self, idx: usize) -> bool {
142        assert!(idx < self.len);
143        unsafe { self.value_unchecked(idx) }
144    }
145
146    /// Returns the boolean value at index `i`.
147    ///
148    /// # Safety
149    /// This doesn't check bounds, the caller must ensure that index < self.len()
150    #[inline]
151    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
152        unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.offset) }
153    }
154
155    /// Returns the packed values of this [`BooleanBuffer`] not including any offset
156    #[inline]
157    pub fn values(&self) -> &[u8] {
158        &self.buffer
159    }
160
161    /// Slices this [`BooleanBuffer`] by the provided `offset` and `length`
162    pub fn slice(&self, offset: usize, len: usize) -> Self {
163        assert!(
164            offset.saturating_add(len) <= self.len,
165            "the length + offset of the sliced BooleanBuffer cannot exceed the existing length"
166        );
167        Self {
168            buffer: self.buffer.clone(),
169            offset: self.offset + offset,
170            len,
171        }
172    }
173
174    /// Returns a [`Buffer`] containing the sliced contents of this [`BooleanBuffer`]
175    ///
176    /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)`
177    pub fn sliced(&self) -> Buffer {
178        self.buffer.bit_slice(self.offset, self.len)
179    }
180
181    /// Returns true if this [`BooleanBuffer`] is equal to `other`, using pointer comparisons
182    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
183    /// return false when the arrays are logically equal
184    pub fn ptr_eq(&self, other: &Self) -> bool {
185        self.buffer.as_ptr() == other.buffer.as_ptr()
186            && self.offset == other.offset
187            && self.len == other.len
188    }
189
190    /// Returns the inner [`Buffer`]
191    #[inline]
192    pub fn inner(&self) -> &Buffer {
193        &self.buffer
194    }
195
196    /// Returns the inner [`Buffer`], consuming self
197    pub fn into_inner(self) -> Buffer {
198        self.buffer
199    }
200
201    /// Returns an iterator over the bits in this [`BooleanBuffer`]
202    pub fn iter(&self) -> BitIterator<'_> {
203        self.into_iter()
204    }
205
206    /// Returns an iterator over the set bit positions in this [`BooleanBuffer`]
207    pub fn set_indices(&self) -> BitIndexIterator<'_> {
208        BitIndexIterator::new(self.values(), self.offset, self.len)
209    }
210
211    /// Returns a `u32` iterator over set bit positions without any usize->u32 conversion
212    pub fn set_indices_u32(&self) -> BitIndexU32Iterator<'_> {
213        BitIndexU32Iterator::new(self.values(), self.offset, self.len)
214    }
215
216    /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
217    pub fn set_slices(&self) -> BitSliceIterator<'_> {
218        BitSliceIterator::new(self.values(), self.offset, self.len)
219    }
220}
221
222impl Not for &BooleanBuffer {
223    type Output = BooleanBuffer;
224
225    fn not(self) -> Self::Output {
226        BooleanBuffer {
227            buffer: buffer_unary_not(&self.buffer, self.offset, self.len),
228            offset: 0,
229            len: self.len,
230        }
231    }
232}
233
234impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
235    type Output = BooleanBuffer;
236
237    fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
238        assert_eq!(self.len, rhs.len);
239        BooleanBuffer {
240            buffer: buffer_bin_and(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
241            offset: 0,
242            len: self.len,
243        }
244    }
245}
246
247impl BitOr<&BooleanBuffer> for &BooleanBuffer {
248    type Output = BooleanBuffer;
249
250    fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
251        assert_eq!(self.len, rhs.len);
252        BooleanBuffer {
253            buffer: buffer_bin_or(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
254            offset: 0,
255            len: self.len,
256        }
257    }
258}
259
260impl BitXor<&BooleanBuffer> for &BooleanBuffer {
261    type Output = BooleanBuffer;
262
263    fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
264        assert_eq!(self.len, rhs.len);
265        BooleanBuffer {
266            buffer: buffer_bin_xor(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
267            offset: 0,
268            len: self.len,
269        }
270    }
271}
272
273impl<'a> IntoIterator for &'a BooleanBuffer {
274    type Item = bool;
275    type IntoIter = BitIterator<'a>;
276
277    fn into_iter(self) -> Self::IntoIter {
278        BitIterator::new(self.values(), self.offset, self.len)
279    }
280}
281
282impl From<&[bool]> for BooleanBuffer {
283    fn from(value: &[bool]) -> Self {
284        let mut builder = BooleanBufferBuilder::new(value.len());
285        builder.append_slice(value);
286        builder.finish()
287    }
288}
289
290impl From<Vec<bool>> for BooleanBuffer {
291    fn from(value: Vec<bool>) -> Self {
292        value.as_slice().into()
293    }
294}
295
296impl FromIterator<bool> for BooleanBuffer {
297    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
298        let iter = iter.into_iter();
299        let (hint, _) = iter.size_hint();
300        let mut builder = BooleanBufferBuilder::new(hint);
301        iter.for_each(|b| builder.append(b));
302        builder.finish()
303    }
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309
310    #[test]
311    fn test_boolean_new() {
312        let bytes = &[0, 1, 2, 3, 4];
313        let buf = Buffer::from(bytes);
314        let offset = 0;
315        let len = 24;
316
317        let boolean_buf = BooleanBuffer::new(buf.clone(), offset, len);
318        assert_eq!(bytes, boolean_buf.values());
319        assert_eq!(offset, boolean_buf.offset());
320        assert_eq!(len, boolean_buf.len());
321
322        assert_eq!(2, boolean_buf.count_set_bits());
323        assert_eq!(&buf, boolean_buf.inner());
324        assert_eq!(buf, boolean_buf.clone().into_inner());
325
326        assert!(!boolean_buf.is_empty())
327    }
328
329    #[test]
330    fn test_boolean_data_equality() {
331        let boolean_buf1 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
332        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
333        assert_eq!(boolean_buf1, boolean_buf2);
334
335        // slice with same offset and same length should still preserve equality
336        let boolean_buf3 = boolean_buf1.slice(8, 16);
337        assert_ne!(boolean_buf1, boolean_buf3);
338        let boolean_buf4 = boolean_buf1.slice(0, 32);
339        assert_eq!(boolean_buf1, boolean_buf4);
340
341        // unequal because of different elements
342        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 0, 2, 3, 4]), 0, 32);
343        assert_ne!(boolean_buf1, boolean_buf2);
344
345        // unequal because of different length
346        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 24);
347        assert_ne!(boolean_buf1, boolean_buf2);
348
349        // ptr_eq
350        assert!(boolean_buf1.ptr_eq(&boolean_buf1));
351        assert!(boolean_buf2.ptr_eq(&boolean_buf2));
352        assert!(!boolean_buf1.ptr_eq(&boolean_buf2));
353    }
354
355    #[test]
356    fn test_boolean_slice() {
357        let bytes = &[0, 3, 2, 6, 2];
358        let boolean_buf1 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
359        let boolean_buf2 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
360
361        let boolean_slice1 = boolean_buf1.slice(16, 16);
362        let boolean_slice2 = boolean_buf2.slice(0, 16);
363        assert_eq!(boolean_slice1.values(), boolean_slice2.values());
364
365        assert_eq!(bytes, boolean_slice1.values());
366        assert_eq!(16, boolean_slice1.offset);
367        assert_eq!(16, boolean_slice1.len);
368
369        assert_eq!(bytes, boolean_slice2.values());
370        assert_eq!(0, boolean_slice2.offset);
371        assert_eq!(16, boolean_slice2.len);
372    }
373
374    #[test]
375    fn test_boolean_bitand() {
376        let offset = 0;
377        let len = 40;
378
379        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
380        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
381
382        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
383        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
384
385        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 0, 0]), offset, len);
386        assert_eq!(boolean_buf1 & boolean_buf2, expected);
387    }
388
389    #[test]
390    fn test_boolean_bitor() {
391        let offset = 0;
392        let len = 40;
393
394        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
395        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
396
397        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
398        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
399
400        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 1, 0]), offset, len);
401        assert_eq!(boolean_buf1 | boolean_buf2, expected);
402    }
403
404    #[test]
405    fn test_boolean_bitxor() {
406        let offset = 0;
407        let len = 40;
408
409        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
410        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
411
412        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
413        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
414
415        let expected = BooleanBuffer::new(Buffer::from(&[0, 0, 0, 1, 0]), offset, len);
416        assert_eq!(boolean_buf1 ^ boolean_buf2, expected);
417    }
418
419    #[test]
420    fn test_boolean_not() {
421        let offset = 0;
422        let len = 40;
423
424        let buf = Buffer::from(&[0, 1, 1, 0, 0]);
425        let boolean_buf = &BooleanBuffer::new(buf, offset, len);
426
427        let expected = BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
428        assert_eq!(!boolean_buf, expected);
429    }
430
431    #[test]
432    fn test_boolean_from_slice_bool() {
433        let v = [true, false, false];
434        let buf = BooleanBuffer::from(&v[..]);
435        assert_eq!(buf.offset(), 0);
436        assert_eq!(buf.len(), 3);
437        assert_eq!(buf.values().len(), 1);
438        assert!(buf.value(0));
439    }
440}