arrow_buffer/builder/
null.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
19
20/// Builder for creating [`NullBuffer`]
21///
22/// # Performance
23///
24/// This builder only materializes the buffer when we append `false`.
25/// If you only append `true`s to the builder, what you get will be
26/// `None` when calling [`finish`](#method.finish).
27///
28/// This optimization is **very** important for the performance as it avoids
29/// allocating memory for the null buffer when there are no nulls.
30///
31/// See [`Self::allocated_size`] to get the current memory allocated by the builder.
32///
33/// # Example
34/// ```
35/// # use arrow_buffer::NullBufferBuilder;
36/// let mut builder = NullBufferBuilder::new(8);
37/// builder.append_n_non_nulls(8);
38/// // If no non null values are appended, the null buffer is not created
39/// let buffer = builder.finish();
40/// assert!(buffer.is_none());
41/// // however, if a null value is appended, the null buffer is created
42/// let mut builder = NullBufferBuilder::new(8);
43/// builder.append_n_non_nulls(7);
44/// builder.append_null();
45/// let buffer = builder.finish().unwrap();
46/// assert_eq!(buffer.len(), 8);
47/// assert_eq!(buffer.iter().collect::<Vec<_>>(), vec![true, true, true, true, true, true, true, false]);
48/// ```
49#[derive(Debug)]
50pub struct NullBufferBuilder {
51    /// The bitmap builder to store the null buffer:
52    /// * `Some` if any nulls have been appended ("materialized")
53    /// * `None` if no nulls have been appended.
54    bitmap_builder: Option<BooleanBufferBuilder>,
55    /// Length of the buffer before materializing.
56    ///
57    /// if `bitmap_buffer` buffer is `Some`, this value is not used.
58    len: usize,
59    /// Initial capacity of the `bitmap_builder`, when it is materialized.
60    capacity: usize,
61}
62
63impl NullBufferBuilder {
64    /// Creates a new empty builder.
65    ///
66    /// Note that this method does not allocate any memory, regardless of the
67    /// `capacity` parameter. If an allocation is required, `capacity` is the
68    /// size in bits (not bytes) that will be allocated at minimum.
69    pub fn new(capacity: usize) -> Self {
70        Self {
71            bitmap_builder: None,
72            len: 0,
73            capacity,
74        }
75    }
76
77    /// Creates a new builder with given length.
78    pub fn new_with_len(len: usize) -> Self {
79        Self {
80            bitmap_builder: None,
81            len,
82            capacity: len,
83        }
84    }
85
86    /// Creates a new builder from a `MutableBuffer`.
87    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
88        let capacity = buffer.len() * 8;
89        assert!(len <= capacity);
90
91        let bitmap_builder = Some(BooleanBufferBuilder::new_from_buffer(buffer, len));
92        Self {
93            bitmap_builder,
94            len,
95            capacity,
96        }
97    }
98
99    /// Appends `n` `true`s into the builder
100    /// to indicate that these `n` items are not nulls.
101    #[inline]
102    pub fn append_n_non_nulls(&mut self, n: usize) {
103        if let Some(buf) = self.bitmap_builder.as_mut() {
104            buf.append_n(n, true)
105        } else {
106            self.len += n;
107        }
108    }
109
110    /// Appends a `true` into the builder
111    /// to indicate that this item is not null.
112    #[inline]
113    pub fn append_non_null(&mut self) {
114        if let Some(buf) = self.bitmap_builder.as_mut() {
115            buf.append(true)
116        } else {
117            self.len += 1;
118        }
119    }
120
121    /// Appends `n` `false`s into the builder
122    /// to indicate that these `n` items are nulls.
123    #[inline]
124    pub fn append_n_nulls(&mut self, n: usize) {
125        self.materialize_if_needed();
126        self.bitmap_builder.as_mut().unwrap().append_n(n, false);
127    }
128
129    /// Appends a `false` into the builder
130    /// to indicate that this item is null.
131    #[inline]
132    pub fn append_null(&mut self) {
133        self.materialize_if_needed();
134        self.bitmap_builder.as_mut().unwrap().append(false);
135    }
136
137    /// Appends a boolean value into the builder.
138    #[inline]
139    pub fn append(&mut self, not_null: bool) {
140        if not_null {
141            self.append_non_null()
142        } else {
143            self.append_null()
144        }
145    }
146
147    /// Gets a bit in the buffer at `index`
148    #[inline]
149    pub fn is_valid(&self, index: usize) -> bool {
150        if let Some(ref buf) = self.bitmap_builder {
151            buf.get_bit(index)
152        } else {
153            true
154        }
155    }
156
157    /// Truncates the builder to the given length
158    ///
159    /// If `len` is greater than the buffer's current length, this has no effect
160    #[inline]
161    pub fn truncate(&mut self, len: usize) {
162        if let Some(buf) = self.bitmap_builder.as_mut() {
163            buf.truncate(len);
164        } else if len <= self.len {
165            self.len = len
166        }
167    }
168
169    /// Appends a boolean slice into the builder
170    /// to indicate the validations of these items.
171    pub fn append_slice(&mut self, slice: &[bool]) {
172        if slice.iter().any(|v| !v) {
173            self.materialize_if_needed()
174        }
175        if let Some(buf) = self.bitmap_builder.as_mut() {
176            buf.append_slice(slice)
177        } else {
178            self.len += slice.len();
179        }
180    }
181
182    /// Append [`NullBuffer`] to this [`NullBufferBuilder`]
183    ///
184    /// This is useful when you want to concatenate two null buffers.
185    pub fn append_buffer(&mut self, buffer: &NullBuffer) {
186        if buffer.null_count() > 0 {
187            self.materialize_if_needed();
188        }
189        if let Some(buf) = self.bitmap_builder.as_mut() {
190            buf.append_buffer(buffer.inner())
191        } else {
192            self.len += buffer.len();
193        }
194    }
195
196    /// Builds the null buffer and resets the builder.
197    /// Returns `None` if the builder only contains `true`s.
198    pub fn finish(&mut self) -> Option<NullBuffer> {
199        self.len = 0;
200        Some(NullBuffer::new(self.bitmap_builder.take()?.finish()))
201    }
202
203    /// Builds the [NullBuffer] without resetting the builder.
204    pub fn finish_cloned(&self) -> Option<NullBuffer> {
205        let buffer = self.bitmap_builder.as_ref()?.finish_cloned();
206        Some(NullBuffer::new(buffer))
207    }
208
209    /// Returns the inner bitmap builder as slice
210    pub fn as_slice(&self) -> Option<&[u8]> {
211        Some(self.bitmap_builder.as_ref()?.as_slice())
212    }
213
214    fn materialize_if_needed(&mut self) {
215        if self.bitmap_builder.is_none() {
216            self.materialize()
217        }
218    }
219
220    #[cold]
221    fn materialize(&mut self) {
222        if self.bitmap_builder.is_none() {
223            let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity));
224            b.append_n(self.len, true);
225            self.bitmap_builder = Some(b);
226        }
227    }
228
229    /// Return a mutable reference to the inner bitmap slice.
230    pub fn as_slice_mut(&mut self) -> Option<&mut [u8]> {
231        self.bitmap_builder.as_mut().map(|b| b.as_slice_mut())
232    }
233
234    /// Return the allocated size of this builder, in bytes, useful for memory accounting.
235    pub fn allocated_size(&self) -> usize {
236        self.bitmap_builder
237            .as_ref()
238            .map(|b| b.capacity() / 8)
239            .unwrap_or(0)
240    }
241}
242
243impl NullBufferBuilder {
244    /// Return the number of bits in the buffer.
245    pub fn len(&self) -> usize {
246        self.bitmap_builder.as_ref().map_or(self.len, |b| b.len())
247    }
248
249    /// Check if the builder is empty.
250    pub fn is_empty(&self) -> bool {
251        self.len() == 0
252    }
253}
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258
259    #[test]
260    fn test_null_buffer_builder() {
261        let mut builder = NullBufferBuilder::new(0);
262        builder.append_null();
263        builder.append_non_null();
264        builder.append_n_nulls(2);
265        builder.append_n_non_nulls(2);
266        assert_eq!(6, builder.len());
267        assert_eq!(64, builder.allocated_size());
268
269        let buf = builder.finish().unwrap();
270        assert_eq!(&[0b110010_u8], buf.validity());
271    }
272
273    #[test]
274    fn test_null_buffer_builder_all_nulls() {
275        let mut builder = NullBufferBuilder::new(0);
276        builder.append_null();
277        builder.append_n_nulls(2);
278        builder.append_slice(&[false, false, false]);
279        assert_eq!(6, builder.len());
280        assert_eq!(64, builder.allocated_size());
281
282        let buf = builder.finish().unwrap();
283        assert_eq!(&[0b0_u8], buf.validity());
284    }
285
286    #[test]
287    fn test_null_buffer_builder_no_null() {
288        let mut builder = NullBufferBuilder::new(0);
289        builder.append_non_null();
290        builder.append_n_non_nulls(2);
291        builder.append_slice(&[true, true, true]);
292        assert_eq!(6, builder.len());
293        assert_eq!(0, builder.allocated_size());
294
295        let buf = builder.finish();
296        assert!(buf.is_none());
297    }
298
299    #[test]
300    fn test_null_buffer_builder_reset() {
301        let mut builder = NullBufferBuilder::new(0);
302        builder.append_slice(&[true, false, true]);
303        builder.finish();
304        assert!(builder.is_empty());
305
306        builder.append_slice(&[true, true, true]);
307        assert!(builder.finish().is_none());
308        assert!(builder.is_empty());
309
310        builder.append_slice(&[true, true, false, true]);
311
312        let buf = builder.finish().unwrap();
313        assert_eq!(&[0b1011_u8], buf.validity());
314    }
315
316    #[test]
317    fn test_null_buffer_builder_is_valid() {
318        let mut builder = NullBufferBuilder::new(0);
319        builder.append_n_non_nulls(6);
320        assert!(builder.is_valid(0));
321
322        builder.append_null();
323        assert!(!builder.is_valid(6));
324
325        builder.append_non_null();
326        assert!(builder.is_valid(7));
327    }
328
329    #[test]
330    fn test_null_buffer_builder_truncate() {
331        let mut builder = NullBufferBuilder::new(10);
332        builder.append_n_non_nulls(16);
333        assert_eq!(builder.as_slice(), None);
334        builder.truncate(20);
335        assert_eq!(builder.as_slice(), None);
336        assert_eq!(builder.len(), 16);
337        assert_eq!(builder.allocated_size(), 0);
338        builder.truncate(14);
339        assert_eq!(builder.as_slice(), None);
340        assert_eq!(builder.len(), 14);
341        builder.append_null();
342        builder.append_non_null();
343        assert_eq!(builder.as_slice().unwrap(), &[0xFF, 0b10111111]);
344        assert_eq!(builder.allocated_size(), 64);
345    }
346
347    #[test]
348    fn test_null_buffer_builder_truncate_never_materialized() {
349        let mut builder = NullBufferBuilder::new(0);
350        assert_eq!(builder.len(), 0);
351        builder.append_n_nulls(2); // doesn't materialize
352        assert_eq!(builder.len(), 2);
353        builder.truncate(1);
354        assert_eq!(builder.len(), 1);
355    }
356
357    #[test]
358    fn test_append_buffers() {
359        let mut builder = NullBufferBuilder::new(0);
360        let buffer1 = NullBuffer::from(&[true, true]);
361        let buffer2 = NullBuffer::from(&[true, true, false]);
362
363        builder.append_buffer(&buffer1);
364        builder.append_buffer(&buffer2);
365
366        assert_eq!(builder.as_slice().unwrap(), &[0b01111_u8]);
367    }
368
369    #[test]
370    fn test_append_buffers_with_unaligned_length() {
371        let mut builder = NullBufferBuilder::new(0);
372        let buffer = NullBuffer::from(&[true, true, false, true, false]);
373        builder.append_buffer(&buffer);
374        assert_eq!(builder.as_slice().unwrap(), &[0b01011_u8]);
375
376        let buffer = NullBuffer::from(&[false, false, true, true, true, false, false]);
377        builder.append_buffer(&buffer);
378        assert_eq!(builder.as_slice().unwrap(), &[0b10001011_u8, 0b0011_u8]);
379    }
380
381    #[test]
382    fn test_append_empty_buffer() {
383        let mut builder = NullBufferBuilder::new(0);
384        let buffer = NullBuffer::from(&[true, true, false, true]);
385        builder.append_buffer(&buffer);
386        assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
387
388        let buffer = NullBuffer::from(&[]);
389        builder.append_buffer(&buffer);
390
391        assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
392    }
393
394    #[test]
395    fn test_should_not_materialize_when_appending_all_valid_buffers() {
396        let mut builder = NullBufferBuilder::new(0);
397        let buffer = NullBuffer::from(&[true; 10]);
398        builder.append_buffer(&buffer);
399
400        let buffer = NullBuffer::from(&[true; 2]);
401        builder.append_buffer(&buffer);
402
403        assert_eq!(builder.finish(), None);
404    }
405}