Skip to main content

arrow_buffer/builder/
null.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
19
20/// Builder for creating [`NullBuffer`]s (bitmaps indicating validity/nulls).
21///
22/// # See also
23/// * [`BooleanBufferBuilder`] for a lower-level bitmap builder.
24/// * [`Self::allocated_size`] for the current memory allocated by the builder.
25///
26/// # Performance
27///
28/// This builder only materializes the buffer when null values (`false`) are
29/// appended. If you only append non-null, (`true`) to the builder, no buffer is
30/// allocated and [`build`](#method.build) or [`finish`](#method.finish) return
31/// `None`.
32///
33/// This optimization is **very** important for the performance as it avoids
34/// allocating memory for the null buffer when there are no nulls.
35///
36/// # Example
37/// ```
38/// # use arrow_buffer::NullBufferBuilder;
39/// let mut builder = NullBufferBuilder::new(8);
40/// builder.append_n_non_nulls(8);
41/// // If no non null values are appended, the null buffer is not created
42/// let buffer = builder.finish();
43/// assert!(buffer.is_none());
44/// // however, if a null value is appended, the null buffer is created
45/// let mut builder = NullBufferBuilder::new(8);
46/// builder.append_n_non_nulls(7);
47/// builder.append_null();
48/// let buffer = builder.finish().unwrap();
49/// assert_eq!(buffer.len(), 8);
50/// assert_eq!(buffer.iter().collect::<Vec<_>>(), vec![true, true, true, true, true, true, true, false]);
51/// ```
52#[derive(Debug)]
53pub struct NullBufferBuilder {
54    /// The bitmap builder to store the null buffer:
55    /// * `Some` if any nulls have been appended ("materialized")
56    /// * `None` if no nulls have been appended.
57    bitmap_builder: Option<BooleanBufferBuilder>,
58    /// Length of the buffer before materializing.
59    ///
60    /// if `bitmap_buffer` buffer is `Some`, this value is not used.
61    len: usize,
62    /// Initial capacity of the `bitmap_builder`, when it is materialized.
63    capacity: usize,
64}
65
66impl NullBufferBuilder {
67    /// Creates a new empty builder.
68    ///
69    /// Note that this method does not allocate any memory, regardless of the
70    /// `capacity` parameter. If an allocation is required, `capacity` is the
71    /// size in bits (not bytes) that will be allocated at minimum.
72    pub fn new(capacity: usize) -> Self {
73        Self {
74            bitmap_builder: None,
75            len: 0,
76            capacity,
77        }
78    }
79
80    /// Creates a new builder with given length.
81    pub fn new_with_len(len: usize) -> Self {
82        Self {
83            bitmap_builder: None,
84            len,
85            capacity: len,
86        }
87    }
88
89    /// Creates a new builder from a `MutableBuffer`.
90    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
91        let capacity = buffer.len() * 8;
92        assert!(len <= capacity);
93
94        let bitmap_builder = Some(BooleanBufferBuilder::new_from_buffer(buffer, len));
95        Self {
96            bitmap_builder,
97            len,
98            capacity,
99        }
100    }
101
102    /// Appends `n` `true`s into the builder
103    /// to indicate that these `n` items are not nulls.
104    #[inline]
105    pub fn append_n_non_nulls(&mut self, n: usize) {
106        if let Some(buf) = self.bitmap_builder.as_mut() {
107            buf.append_n(n, true)
108        } else {
109            self.len += n;
110        }
111    }
112
113    /// Appends a `true` into the builder
114    /// to indicate that this item is not null.
115    #[inline]
116    pub fn append_non_null(&mut self) {
117        if let Some(buf) = self.bitmap_builder.as_mut() {
118            buf.append(true)
119        } else {
120            self.len += 1;
121        }
122    }
123
124    /// Appends `n` `false`s into the builder
125    /// to indicate that these `n` items are nulls.
126    #[inline]
127    pub fn append_n_nulls(&mut self, n: usize) {
128        self.materialize_if_needed();
129        self.bitmap_builder.as_mut().unwrap().append_n(n, false);
130    }
131
132    /// Appends a `false` into the builder
133    /// to indicate that this item is null.
134    #[inline]
135    pub fn append_null(&mut self) {
136        self.materialize_if_needed();
137        self.bitmap_builder.as_mut().unwrap().append(false);
138    }
139
140    /// Appends a boolean value into the builder.
141    #[inline]
142    pub fn append(&mut self, not_null: bool) {
143        if not_null {
144            self.append_non_null()
145        } else {
146            self.append_null()
147        }
148    }
149
150    /// Gets a bit in the buffer at `index`
151    #[inline]
152    pub fn is_valid(&self, index: usize) -> bool {
153        if let Some(ref buf) = self.bitmap_builder {
154            buf.get_bit(index)
155        } else {
156            true
157        }
158    }
159
160    /// Truncates the builder to the given length
161    ///
162    /// If `len` is greater than the buffer's current length, this has no effect
163    #[inline]
164    pub fn truncate(&mut self, len: usize) {
165        if let Some(buf) = self.bitmap_builder.as_mut() {
166            buf.truncate(len);
167        } else if len <= self.len {
168            self.len = len
169        }
170    }
171
172    /// Appends a boolean slice into the builder
173    /// to indicate the validations of these items.
174    pub fn append_slice(&mut self, slice: &[bool]) {
175        if slice.iter().any(|v| !v) {
176            self.materialize_if_needed()
177        }
178        if let Some(buf) = self.bitmap_builder.as_mut() {
179            buf.append_slice(slice)
180        } else {
181            self.len += slice.len();
182        }
183    }
184
185    /// Append [`NullBuffer`] to this [`NullBufferBuilder`]
186    ///
187    /// This is useful when you want to concatenate two null buffers.
188    pub fn append_buffer(&mut self, buffer: &NullBuffer) {
189        if buffer.null_count() > 0 {
190            self.materialize_if_needed();
191        }
192        if let Some(buf) = self.bitmap_builder.as_mut() {
193            buf.append_buffer(buffer.inner())
194        } else {
195            self.len += buffer.len();
196        }
197    }
198
199    /// Builds the [`NullBuffer`] and resets the builder.
200    ///
201    /// Returns `None` if the builder only contains `true`s. Use [`Self::build`]
202    /// when you don't need to reuse this builder.
203    pub fn finish(&mut self) -> Option<NullBuffer> {
204        self.len = 0;
205        Some(NullBuffer::new(self.bitmap_builder.take()?.build()))
206    }
207
208    /// Builds the [`NullBuffer`] without resetting the builder.
209    ///
210    /// This consumes the builder. Use [`Self::finish`] to reuse it.
211    pub fn build(self) -> Option<NullBuffer> {
212        self.bitmap_builder.map(NullBuffer::from)
213    }
214
215    /// Builds the [NullBuffer] without resetting the builder.
216    pub fn finish_cloned(&self) -> Option<NullBuffer> {
217        let buffer = self.bitmap_builder.as_ref()?.finish_cloned();
218        Some(NullBuffer::new(buffer))
219    }
220
221    /// Returns the inner bitmap builder as slice
222    pub fn as_slice(&self) -> Option<&[u8]> {
223        Some(self.bitmap_builder.as_ref()?.as_slice())
224    }
225
226    fn materialize_if_needed(&mut self) {
227        if self.bitmap_builder.is_none() {
228            self.materialize()
229        }
230    }
231
232    #[cold]
233    fn materialize(&mut self) {
234        if self.bitmap_builder.is_none() {
235            let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity));
236            b.append_n(self.len, true);
237            self.bitmap_builder = Some(b);
238        }
239    }
240
241    /// Return a mutable reference to the inner bitmap slice.
242    pub fn as_slice_mut(&mut self) -> Option<&mut [u8]> {
243        self.bitmap_builder.as_mut().map(|b| b.as_slice_mut())
244    }
245
246    /// Return the allocated size of this builder, in bytes, useful for memory accounting.
247    pub fn allocated_size(&self) -> usize {
248        self.bitmap_builder
249            .as_ref()
250            .map(|b| b.capacity() / 8)
251            .unwrap_or(0)
252    }
253
254    /// Return the number of bits in the buffer.
255    pub fn len(&self) -> usize {
256        self.bitmap_builder.as_ref().map_or(self.len, |b| b.len())
257    }
258
259    /// Check if the builder is empty.
260    pub fn is_empty(&self) -> bool {
261        self.len() == 0
262    }
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_null_buffer_builder() {
271        let mut builder = NullBufferBuilder::new(0);
272        builder.append_null();
273        builder.append_non_null();
274        builder.append_n_nulls(2);
275        builder.append_n_non_nulls(2);
276        assert_eq!(6, builder.len());
277        assert_eq!(64, builder.allocated_size());
278
279        let buf = builder.finish().unwrap();
280        assert_eq!(&[0b110010_u8], buf.validity());
281    }
282
283    #[test]
284    fn test_null_buffer_builder_all_nulls() {
285        let mut builder = NullBufferBuilder::new(0);
286        builder.append_null();
287        builder.append_n_nulls(2);
288        builder.append_slice(&[false, false, false]);
289        assert_eq!(6, builder.len());
290        assert_eq!(64, builder.allocated_size());
291
292        let buf = builder.finish().unwrap();
293        assert_eq!(&[0b0_u8], buf.validity());
294    }
295
296    #[test]
297    fn test_null_buffer_builder_no_null() {
298        let mut builder = NullBufferBuilder::new(0);
299        builder.append_non_null();
300        builder.append_n_non_nulls(2);
301        builder.append_slice(&[true, true, true]);
302        assert_eq!(6, builder.len());
303        assert_eq!(0, builder.allocated_size());
304
305        let buf = builder.finish();
306        assert!(buf.is_none());
307    }
308
309    #[test]
310    fn test_null_buffer_builder_reset() {
311        let mut builder = NullBufferBuilder::new(0);
312        builder.append_slice(&[true, false, true]);
313        builder.finish();
314        assert!(builder.is_empty());
315
316        builder.append_slice(&[true, true, true]);
317        assert!(builder.finish().is_none());
318        assert!(builder.is_empty());
319
320        builder.append_slice(&[true, true, false, true]);
321
322        let buf = builder.finish().unwrap();
323        assert_eq!(&[0b1011_u8], buf.validity());
324    }
325
326    #[test]
327    fn test_null_buffer_builder_is_valid() {
328        let mut builder = NullBufferBuilder::new(0);
329        builder.append_n_non_nulls(6);
330        assert!(builder.is_valid(0));
331
332        builder.append_null();
333        assert!(!builder.is_valid(6));
334
335        builder.append_non_null();
336        assert!(builder.is_valid(7));
337    }
338
339    #[test]
340    fn test_null_buffer_builder_truncate() {
341        let mut builder = NullBufferBuilder::new(10);
342        builder.append_n_non_nulls(16);
343        assert_eq!(builder.as_slice(), None);
344        builder.truncate(20);
345        assert_eq!(builder.as_slice(), None);
346        assert_eq!(builder.len(), 16);
347        assert_eq!(builder.allocated_size(), 0);
348        builder.truncate(14);
349        assert_eq!(builder.as_slice(), None);
350        assert_eq!(builder.len(), 14);
351        builder.append_null();
352        builder.append_non_null();
353        assert_eq!(builder.as_slice().unwrap(), &[0xFF, 0b10111111]);
354        assert_eq!(builder.allocated_size(), 64);
355    }
356
357    #[test]
358    fn test_null_buffer_builder_truncate_never_materialized() {
359        let mut builder = NullBufferBuilder::new(0);
360        assert_eq!(builder.len(), 0);
361        builder.append_n_nulls(2); // doesn't materialize
362        assert_eq!(builder.len(), 2);
363        builder.truncate(1);
364        assert_eq!(builder.len(), 1);
365    }
366
367    #[test]
368    fn test_append_buffers() {
369        let mut builder = NullBufferBuilder::new(0);
370        let buffer1 = NullBuffer::from(&[true, true]);
371        let buffer2 = NullBuffer::from(&[true, true, false]);
372
373        builder.append_buffer(&buffer1);
374        builder.append_buffer(&buffer2);
375
376        assert_eq!(builder.as_slice().unwrap(), &[0b01111_u8]);
377    }
378
379    #[test]
380    fn test_append_buffers_with_unaligned_length() {
381        let mut builder = NullBufferBuilder::new(0);
382        let buffer = NullBuffer::from(&[true, true, false, true, false]);
383        builder.append_buffer(&buffer);
384        assert_eq!(builder.as_slice().unwrap(), &[0b01011_u8]);
385
386        let buffer = NullBuffer::from(&[false, false, true, true, true, false, false]);
387        builder.append_buffer(&buffer);
388        assert_eq!(builder.as_slice().unwrap(), &[0b10001011_u8, 0b0011_u8]);
389    }
390
391    #[test]
392    fn test_append_empty_buffer() {
393        let mut builder = NullBufferBuilder::new(0);
394        let buffer = NullBuffer::from(&[true, true, false, true]);
395        builder.append_buffer(&buffer);
396        assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
397
398        let buffer = NullBuffer::from(&[]);
399        builder.append_buffer(&buffer);
400
401        assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
402    }
403
404    #[test]
405    fn test_should_not_materialize_when_appending_all_valid_buffers() {
406        let mut builder = NullBufferBuilder::new(0);
407        let buffer = NullBuffer::from(&[true; 10]);
408        builder.append_buffer(&buffer);
409
410        let buffer = NullBuffer::from(&[true; 2]);
411        builder.append_buffer(&buffer);
412
413        assert_eq!(builder.finish(), None);
414    }
415}