Skip to main content

arrow_buffer/builder/
null.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
19
20/// Builder for creating [`NullBuffer`]s (bitmaps indicating validity/nulls).
21///
22/// # See also
23/// * [`BooleanBufferBuilder`] for a lower-level bitmap builder.
24/// * [`Self::allocated_size`] for the current memory allocated by the builder.
25///
26/// # Performance
27///
28/// This builder only materializes the buffer when null values (`false`) are
29/// appended. If you only append non-null, (`true`) to the builder, no buffer is
30/// allocated and [`build`](#method.build) or [`finish`](#method.finish) return
31/// `None`.
32///
33/// This optimization is **very** important for the performance as it avoids
34/// allocating memory for the null buffer when there are no nulls.
35///
36/// # Example
37/// ```
38/// # use arrow_buffer::NullBufferBuilder;
39/// let mut builder = NullBufferBuilder::new(8);
40/// builder.append_n_non_nulls(8);
41/// // If no non null values are appended, the null buffer is not created
42/// let buffer = builder.finish();
43/// assert!(buffer.is_none());
44/// // however, if a null value is appended, the null buffer is created
45/// let mut builder = NullBufferBuilder::new(8);
46/// builder.append_n_non_nulls(7);
47/// builder.append_null();
48/// let buffer = builder.finish().unwrap();
49/// assert_eq!(buffer.len(), 8);
50/// assert_eq!(buffer.iter().collect::<Vec<_>>(), vec![true, true, true, true, true, true, true, false]);
51/// ```
52#[derive(Debug)]
53pub struct NullBufferBuilder {
54    /// The bitmap builder to store the null buffer:
55    /// * `Some` if any nulls have been appended ("materialized")
56    /// * `None` if no nulls have been appended.
57    bitmap_builder: Option<BooleanBufferBuilder>,
58    /// Length of the buffer before materializing.
59    ///
60    /// if `bitmap_buffer` buffer is `Some`, this value is not used.
61    len: usize,
62    /// Initial capacity of the `bitmap_builder`, when it is materialized.
63    capacity: usize,
64}
65
66impl NullBufferBuilder {
67    /// Creates a new empty builder.
68    ///
69    /// Note that this method does not allocate any memory, regardless of the
70    /// `capacity` parameter. If an allocation is required, `capacity` is the
71    /// size in bits (not bytes) that will be allocated at minimum.
72    pub fn new(capacity: usize) -> Self {
73        Self {
74            bitmap_builder: None,
75            len: 0,
76            capacity,
77        }
78    }
79
80    /// Creates a new builder with given length.
81    pub fn new_with_len(len: usize) -> Self {
82        Self {
83            bitmap_builder: None,
84            len,
85            capacity: len,
86        }
87    }
88
89    /// Creates a new builder from a `MutableBuffer`.
90    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
91        let capacity = buffer.len() * 8;
92        assert!(len <= capacity);
93
94        let bitmap_builder = Some(BooleanBufferBuilder::new_from_buffer(buffer, len));
95        Self {
96            bitmap_builder,
97            len,
98            capacity,
99        }
100    }
101
102    /// Appends `n` `true`s into the builder
103    /// to indicate that these `n` items are not nulls.
104    #[inline]
105    pub fn append_n_non_nulls(&mut self, n: usize) {
106        if let Some(buf) = self.bitmap_builder.as_mut() {
107            buf.append_n(n, true)
108        } else {
109            self.len += n;
110        }
111    }
112
113    /// Appends a `true` into the builder
114    /// to indicate that this item is not null.
115    #[inline]
116    pub fn append_non_null(&mut self) {
117        if let Some(buf) = self.bitmap_builder.as_mut() {
118            buf.append(true)
119        } else {
120            self.len += 1;
121        }
122    }
123
124    /// Appends `n` `false`s into the builder
125    /// to indicate that these `n` items are nulls.
126    #[inline]
127    pub fn append_n_nulls(&mut self, n: usize) {
128        self.materialize_if_needed();
129        self.bitmap_builder.as_mut().unwrap().append_n(n, false);
130    }
131
132    /// Appends a `false` into the builder
133    /// to indicate that this item is null.
134    #[inline]
135    pub fn append_null(&mut self) {
136        self.materialize_if_needed();
137        self.bitmap_builder.as_mut().unwrap().append(false);
138    }
139
140    /// Appends a boolean value into the builder.
141    #[inline]
142    pub fn append(&mut self, not_null: bool) {
143        if not_null {
144            self.append_non_null()
145        } else {
146            self.append_null()
147        }
148    }
149
150    /// Sets a bit in the builder at `index`
151    #[inline]
152    pub fn set_bit(&mut self, index: usize, v: bool) {
153        self.materialize_if_needed();
154        self.bitmap_builder.as_mut().unwrap().set_bit(index, v);
155    }
156
157    /// Gets a bit in the buffer at `index`
158    #[inline]
159    pub fn is_valid(&self, index: usize) -> bool {
160        if let Some(ref buf) = self.bitmap_builder {
161            buf.get_bit(index)
162        } else {
163            true
164        }
165    }
166
167    /// Truncates the builder to the given length
168    ///
169    /// If `len` is greater than the buffer's current length, this has no effect
170    #[inline]
171    pub fn truncate(&mut self, len: usize) {
172        if let Some(buf) = self.bitmap_builder.as_mut() {
173            buf.truncate(len);
174        } else if len <= self.len {
175            self.len = len
176        }
177    }
178
179    /// Appends a boolean slice into the builder
180    /// to indicate the validations of these items.
181    pub fn append_slice(&mut self, slice: &[bool]) {
182        if slice.iter().any(|v| !v) {
183            self.materialize_if_needed()
184        }
185        if let Some(buf) = self.bitmap_builder.as_mut() {
186            buf.append_slice(slice)
187        } else {
188            self.len += slice.len();
189        }
190    }
191
192    /// Append [`NullBuffer`] to this [`NullBufferBuilder`]
193    ///
194    /// This is useful when you want to concatenate two null buffers.
195    pub fn append_buffer(&mut self, buffer: &NullBuffer) {
196        if buffer.null_count() > 0 {
197            self.materialize_if_needed();
198        }
199        if let Some(buf) = self.bitmap_builder.as_mut() {
200            buf.append_buffer(buffer.inner())
201        } else {
202            self.len += buffer.len();
203        }
204    }
205
206    /// Builds the [`NullBuffer`] and resets the builder.
207    ///
208    /// Returns `None` if the builder only contains `true`s. Use [`Self::build`]
209    /// when you don't need to reuse this builder.
210    pub fn finish(&mut self) -> Option<NullBuffer> {
211        self.len = 0;
212        Some(NullBuffer::new(self.bitmap_builder.take()?.build()))
213    }
214
215    /// Builds the [`NullBuffer`] without resetting the builder.
216    ///
217    /// This consumes the builder. Use [`Self::finish`] to reuse it.
218    pub fn build(self) -> Option<NullBuffer> {
219        self.bitmap_builder.map(NullBuffer::from)
220    }
221
222    /// Builds the [NullBuffer] without resetting the builder.
223    pub fn finish_cloned(&self) -> Option<NullBuffer> {
224        let buffer = self.bitmap_builder.as_ref()?.finish_cloned();
225        Some(NullBuffer::new(buffer))
226    }
227
228    /// Returns the inner bitmap builder as slice
229    pub fn as_slice(&self) -> Option<&[u8]> {
230        Some(self.bitmap_builder.as_ref()?.as_slice())
231    }
232
233    fn materialize_if_needed(&mut self) {
234        if self.bitmap_builder.is_none() {
235            self.materialize()
236        }
237    }
238
239    #[cold]
240    fn materialize(&mut self) {
241        if self.bitmap_builder.is_none() {
242            let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity));
243            b.append_n(self.len, true);
244            self.bitmap_builder = Some(b);
245        }
246    }
247
248    /// Return a mutable reference to the inner bitmap slice.
249    pub fn as_slice_mut(&mut self) -> Option<&mut [u8]> {
250        self.bitmap_builder.as_mut().map(|b| b.as_slice_mut())
251    }
252
253    /// Return the allocated size of this builder, in bytes, useful for memory accounting.
254    pub fn allocated_size(&self) -> usize {
255        self.bitmap_builder
256            .as_ref()
257            .map(|b| b.capacity() / 8)
258            .unwrap_or(0)
259    }
260
261    /// Return the number of bits in the buffer.
262    pub fn len(&self) -> usize {
263        self.bitmap_builder.as_ref().map_or(self.len, |b| b.len())
264    }
265
266    /// Check if the builder is empty.
267    pub fn is_empty(&self) -> bool {
268        self.len() == 0
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    #[test]
277    fn test_null_buffer_builder() {
278        let mut builder = NullBufferBuilder::new(0);
279        builder.append_null();
280        builder.append_non_null();
281        builder.append_n_nulls(2);
282        builder.append_n_non_nulls(2);
283        assert_eq!(6, builder.len());
284        assert_eq!(64, builder.allocated_size());
285
286        let buf = builder.finish().unwrap();
287        assert_eq!(&[0b110010_u8], buf.validity());
288    }
289
290    #[test]
291    fn test_null_buffer_builder_all_nulls() {
292        let mut builder = NullBufferBuilder::new(0);
293        builder.append_null();
294        builder.append_n_nulls(2);
295        builder.append_slice(&[false, false, false]);
296        assert_eq!(6, builder.len());
297        assert_eq!(64, builder.allocated_size());
298
299        let buf = builder.finish().unwrap();
300        assert_eq!(&[0b0_u8], buf.validity());
301    }
302
303    #[test]
304    fn test_null_buffer_builder_no_null() {
305        let mut builder = NullBufferBuilder::new(0);
306        builder.append_non_null();
307        builder.append_n_non_nulls(2);
308        builder.append_slice(&[true, true, true]);
309        assert_eq!(6, builder.len());
310        assert_eq!(0, builder.allocated_size());
311
312        let buf = builder.finish();
313        assert!(buf.is_none());
314    }
315
316    #[test]
317    fn test_null_buffer_builder_reset() {
318        let mut builder = NullBufferBuilder::new(0);
319        builder.append_slice(&[true, false, true]);
320        builder.finish();
321        assert!(builder.is_empty());
322
323        builder.append_slice(&[true, true, true]);
324        assert!(builder.finish().is_none());
325        assert!(builder.is_empty());
326
327        builder.append_slice(&[true, true, false, true]);
328
329        let buf = builder.finish().unwrap();
330        assert_eq!(&[0b1011_u8], buf.validity());
331    }
332
333    #[test]
334    fn test_null_buffer_builder_is_valid() {
335        let mut builder = NullBufferBuilder::new(0);
336        builder.append_n_non_nulls(6);
337        assert!(builder.is_valid(0));
338
339        builder.append_null();
340        assert!(!builder.is_valid(6));
341
342        builder.append_non_null();
343        assert!(builder.is_valid(7));
344    }
345
346    #[test]
347    fn test_null_buffer_builder_truncate() {
348        let mut builder = NullBufferBuilder::new(10);
349        builder.append_n_non_nulls(16);
350        assert_eq!(builder.as_slice(), None);
351        builder.truncate(20);
352        assert_eq!(builder.as_slice(), None);
353        assert_eq!(builder.len(), 16);
354        assert_eq!(builder.allocated_size(), 0);
355        builder.truncate(14);
356        assert_eq!(builder.as_slice(), None);
357        assert_eq!(builder.len(), 14);
358        builder.append_null();
359        builder.append_non_null();
360        assert_eq!(builder.as_slice().unwrap(), &[0xFF, 0b10111111]);
361        assert_eq!(builder.allocated_size(), 64);
362    }
363
364    #[test]
365    fn test_null_buffer_builder_truncate_never_materialized() {
366        let mut builder = NullBufferBuilder::new(0);
367        assert_eq!(builder.len(), 0);
368        builder.append_n_nulls(2); // doesn't materialize
369        assert_eq!(builder.len(), 2);
370        builder.truncate(1);
371        assert_eq!(builder.len(), 1);
372    }
373
374    #[test]
375    fn test_append_buffers() {
376        let mut builder = NullBufferBuilder::new(0);
377        let buffer1 = NullBuffer::from(&[true, true]);
378        let buffer2 = NullBuffer::from(&[true, true, false]);
379
380        builder.append_buffer(&buffer1);
381        builder.append_buffer(&buffer2);
382
383        assert_eq!(builder.as_slice().unwrap(), &[0b01111_u8]);
384    }
385
386    #[test]
387    fn test_append_buffers_with_unaligned_length() {
388        let mut builder = NullBufferBuilder::new(0);
389        let buffer = NullBuffer::from(&[true, true, false, true, false]);
390        builder.append_buffer(&buffer);
391        assert_eq!(builder.as_slice().unwrap(), &[0b01011_u8]);
392
393        let buffer = NullBuffer::from(&[false, false, true, true, true, false, false]);
394        builder.append_buffer(&buffer);
395        assert_eq!(builder.as_slice().unwrap(), &[0b10001011_u8, 0b0011_u8]);
396    }
397
398    #[test]
399    fn test_append_empty_buffer() {
400        let mut builder = NullBufferBuilder::new(0);
401        let buffer = NullBuffer::from(&[true, true, false, true]);
402        builder.append_buffer(&buffer);
403        assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
404
405        let buffer = NullBuffer::from(&[]);
406        builder.append_buffer(&buffer);
407
408        assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
409    }
410
411    #[test]
412    fn test_should_not_materialize_when_appending_all_valid_buffers() {
413        let mut builder = NullBufferBuilder::new(0);
414        let buffer = NullBuffer::from(&[true; 10]);
415        builder.append_buffer(&buffer);
416
417        let buffer = NullBuffer::from(&[true; 2]);
418        builder.append_buffer(&buffer);
419
420        assert_eq!(builder.finish(), None);
421    }
422}