arrow_buffer/builder/
null.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
19
20/// Builder for creating [`NullBuffer`]
21///
22/// # Performance
23///
24/// This builder only materializes the buffer when we append `false`.
25/// If you only append `true`s to the builder, what you get will be
26/// `None` when calling [`finish`](#method.finish).
27///
28/// This optimization is **very** important for the performance as it avoids
29/// allocating memory for the null buffer when there are no nulls.
30///
31/// See [`Self::allocated_size`] to get the current memory allocated by the builder.
32///
33/// # Example
34/// ```
35/// # use arrow_buffer::NullBufferBuilder;
36/// let mut builder = NullBufferBuilder::new(8);
37/// builder.append_n_non_nulls(8);
38/// // If no non null values are appended, the null buffer is not created
39/// let buffer = builder.finish();
40/// assert!(buffer.is_none());
41/// // however, if a null value is appended, the null buffer is created
42/// let mut builder = NullBufferBuilder::new(8);
43/// builder.append_n_non_nulls(7);
44/// builder.append_null();
45/// let buffer = builder.finish().unwrap();
46/// assert_eq!(buffer.len(), 8);
47/// assert_eq!(buffer.iter().collect::<Vec<_>>(), vec![true, true, true, true, true, true, true, false]);
48/// ```
49#[derive(Debug)]
50pub struct NullBufferBuilder {
51    /// The bitmap builder to store the null buffer:
52    /// * `Some` if any nulls have been appended ("materialized")
53    /// * `None` if no nulls have been appended.
54    bitmap_builder: Option<BooleanBufferBuilder>,
55    /// Length of the buffer before materializing.
56    ///
57    /// if `bitmap_buffer` buffer is `Some`, this value is not used.
58    len: usize,
59    /// Initial capacity of the `bitmap_builder`, when it is materialized.
60    capacity: usize,
61}
62
63impl NullBufferBuilder {
64    /// Creates a new empty builder.
65    ///
66    /// Note that this method does not allocate any memory, regardless of the
67    /// `capacity` parameter. If an allocation is required, `capacity` is the
68    /// size in bits (not bytes) that will be allocated at minimum.
69    pub fn new(capacity: usize) -> Self {
70        Self {
71            bitmap_builder: None,
72            len: 0,
73            capacity,
74        }
75    }
76
77    /// Creates a new builder with given length.
78    pub fn new_with_len(len: usize) -> Self {
79        Self {
80            bitmap_builder: None,
81            len,
82            capacity: len,
83        }
84    }
85
86    /// Creates a new builder from a `MutableBuffer`.
87    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
88        let capacity = buffer.len() * 8;
89        assert!(len <= capacity);
90
91        let bitmap_builder = Some(BooleanBufferBuilder::new_from_buffer(buffer, len));
92        Self {
93            bitmap_builder,
94            len,
95            capacity,
96        }
97    }
98
99    /// Appends `n` `true`s into the builder
100    /// to indicate that these `n` items are not nulls.
101    #[inline]
102    pub fn append_n_non_nulls(&mut self, n: usize) {
103        if let Some(buf) = self.bitmap_builder.as_mut() {
104            buf.append_n(n, true)
105        } else {
106            self.len += n;
107        }
108    }
109
110    /// Appends a `true` into the builder
111    /// to indicate that this item is not null.
112    #[inline]
113    pub fn append_non_null(&mut self) {
114        if let Some(buf) = self.bitmap_builder.as_mut() {
115            buf.append(true)
116        } else {
117            self.len += 1;
118        }
119    }
120
121    /// Appends `n` `false`s into the builder
122    /// to indicate that these `n` items are nulls.
123    #[inline]
124    pub fn append_n_nulls(&mut self, n: usize) {
125        self.materialize_if_needed();
126        self.bitmap_builder.as_mut().unwrap().append_n(n, false);
127    }
128
129    /// Appends a `false` into the builder
130    /// to indicate that this item is null.
131    #[inline]
132    pub fn append_null(&mut self) {
133        self.materialize_if_needed();
134        self.bitmap_builder.as_mut().unwrap().append(false);
135    }
136
137    /// Appends a boolean value into the builder.
138    #[inline]
139    pub fn append(&mut self, not_null: bool) {
140        if not_null {
141            self.append_non_null()
142        } else {
143            self.append_null()
144        }
145    }
146
147    /// Gets a bit in the buffer at `index`
148    #[inline]
149    pub fn is_valid(&self, index: usize) -> bool {
150        if let Some(ref buf) = self.bitmap_builder {
151            buf.get_bit(index)
152        } else {
153            true
154        }
155    }
156
157    /// Truncates the builder to the given length
158    ///
159    /// If `len` is greater than the buffer's current length, this has no effect
160    #[inline]
161    pub fn truncate(&mut self, len: usize) {
162        if let Some(buf) = self.bitmap_builder.as_mut() {
163            buf.truncate(len);
164        } else if len <= self.len {
165            self.len = len
166        }
167    }
168
169    /// Appends a boolean slice into the builder
170    /// to indicate the validations of these items.
171    pub fn append_slice(&mut self, slice: &[bool]) {
172        if slice.iter().any(|v| !v) {
173            self.materialize_if_needed()
174        }
175        if let Some(buf) = self.bitmap_builder.as_mut() {
176            buf.append_slice(slice)
177        } else {
178            self.len += slice.len();
179        }
180    }
181
182    /// Builds the null buffer and resets the builder.
183    /// Returns `None` if the builder only contains `true`s.
184    pub fn finish(&mut self) -> Option<NullBuffer> {
185        self.len = 0;
186        Some(NullBuffer::new(self.bitmap_builder.take()?.finish()))
187    }
188
189    /// Builds the [NullBuffer] without resetting the builder.
190    pub fn finish_cloned(&self) -> Option<NullBuffer> {
191        let buffer = self.bitmap_builder.as_ref()?.finish_cloned();
192        Some(NullBuffer::new(buffer))
193    }
194
195    /// Returns the inner bitmap builder as slice
196    pub fn as_slice(&self) -> Option<&[u8]> {
197        Some(self.bitmap_builder.as_ref()?.as_slice())
198    }
199
200    fn materialize_if_needed(&mut self) {
201        if self.bitmap_builder.is_none() {
202            self.materialize()
203        }
204    }
205
206    #[cold]
207    fn materialize(&mut self) {
208        if self.bitmap_builder.is_none() {
209            let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity));
210            b.append_n(self.len, true);
211            self.bitmap_builder = Some(b);
212        }
213    }
214
215    /// Return a mutable reference to the inner bitmap slice.
216    pub fn as_slice_mut(&mut self) -> Option<&mut [u8]> {
217        self.bitmap_builder.as_mut().map(|b| b.as_slice_mut())
218    }
219
220    /// Return the allocated size of this builder, in bytes, useful for memory accounting.
221    pub fn allocated_size(&self) -> usize {
222        self.bitmap_builder
223            .as_ref()
224            .map(|b| b.capacity() / 8)
225            .unwrap_or(0)
226    }
227}
228
229impl NullBufferBuilder {
230    /// Return the number of bits in the buffer.
231    pub fn len(&self) -> usize {
232        self.bitmap_builder.as_ref().map_or(self.len, |b| b.len())
233    }
234
235    /// Check if the builder is empty.
236    pub fn is_empty(&self) -> bool {
237        self.len() == 0
238    }
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    #[test]
246    fn test_null_buffer_builder() {
247        let mut builder = NullBufferBuilder::new(0);
248        builder.append_null();
249        builder.append_non_null();
250        builder.append_n_nulls(2);
251        builder.append_n_non_nulls(2);
252        assert_eq!(6, builder.len());
253        assert_eq!(64, builder.allocated_size());
254
255        let buf = builder.finish().unwrap();
256        assert_eq!(&[0b110010_u8], buf.validity());
257    }
258
259    #[test]
260    fn test_null_buffer_builder_all_nulls() {
261        let mut builder = NullBufferBuilder::new(0);
262        builder.append_null();
263        builder.append_n_nulls(2);
264        builder.append_slice(&[false, false, false]);
265        assert_eq!(6, builder.len());
266        assert_eq!(64, builder.allocated_size());
267
268        let buf = builder.finish().unwrap();
269        assert_eq!(&[0b0_u8], buf.validity());
270    }
271
272    #[test]
273    fn test_null_buffer_builder_no_null() {
274        let mut builder = NullBufferBuilder::new(0);
275        builder.append_non_null();
276        builder.append_n_non_nulls(2);
277        builder.append_slice(&[true, true, true]);
278        assert_eq!(6, builder.len());
279        assert_eq!(0, builder.allocated_size());
280
281        let buf = builder.finish();
282        assert!(buf.is_none());
283    }
284
285    #[test]
286    fn test_null_buffer_builder_reset() {
287        let mut builder = NullBufferBuilder::new(0);
288        builder.append_slice(&[true, false, true]);
289        builder.finish();
290        assert!(builder.is_empty());
291
292        builder.append_slice(&[true, true, true]);
293        assert!(builder.finish().is_none());
294        assert!(builder.is_empty());
295
296        builder.append_slice(&[true, true, false, true]);
297
298        let buf = builder.finish().unwrap();
299        assert_eq!(&[0b1011_u8], buf.validity());
300    }
301
302    #[test]
303    fn test_null_buffer_builder_is_valid() {
304        let mut builder = NullBufferBuilder::new(0);
305        builder.append_n_non_nulls(6);
306        assert!(builder.is_valid(0));
307
308        builder.append_null();
309        assert!(!builder.is_valid(6));
310
311        builder.append_non_null();
312        assert!(builder.is_valid(7));
313    }
314
315    #[test]
316    fn test_null_buffer_builder_truncate() {
317        let mut builder = NullBufferBuilder::new(10);
318        builder.append_n_non_nulls(16);
319        assert_eq!(builder.as_slice(), None);
320        builder.truncate(20);
321        assert_eq!(builder.as_slice(), None);
322        assert_eq!(builder.len(), 16);
323        assert_eq!(builder.allocated_size(), 0);
324        builder.truncate(14);
325        assert_eq!(builder.as_slice(), None);
326        assert_eq!(builder.len(), 14);
327        builder.append_null();
328        builder.append_non_null();
329        assert_eq!(builder.as_slice().unwrap(), &[0xFF, 0b10111111]);
330        assert_eq!(builder.allocated_size(), 64);
331    }
332
333    #[test]
334    fn test_null_buffer_builder_truncate_never_materialized() {
335        let mut builder = NullBufferBuilder::new(0);
336        assert_eq!(builder.len(), 0);
337        builder.append_n_nulls(2); // doesn't materialize
338        assert_eq!(builder.len(), 2);
339        builder.truncate(1);
340        assert_eq!(builder.len(), 1);
341    }
342}