arrow_buffer/buffer/
null.rs1use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
19use crate::buffer::BooleanBuffer;
20use crate::{Buffer, MutableBuffer};
21
22#[derive(Debug, Clone, Eq, PartialEq)]
34pub struct NullBuffer {
35 buffer: BooleanBuffer,
36 null_count: usize,
37}
38
39impl NullBuffer {
40 pub fn new(buffer: BooleanBuffer) -> Self {
42 let null_count = buffer.len() - buffer.count_set_bits();
43 Self { buffer, null_count }
44 }
45
46 pub fn new_null(len: usize) -> Self {
48 Self {
49 buffer: BooleanBuffer::new_unset(len),
50 null_count: len,
51 }
52 }
53
54 pub fn new_valid(len: usize) -> Self {
59 Self {
60 buffer: BooleanBuffer::new_set(len),
61 null_count: 0,
62 }
63 }
64
65 pub unsafe fn new_unchecked(buffer: BooleanBuffer, null_count: usize) -> Self {
71 Self { buffer, null_count }
72 }
73
74 pub fn union(lhs: Option<&NullBuffer>, rhs: Option<&NullBuffer>) -> Option<NullBuffer> {
80 match (lhs, rhs) {
81 (Some(lhs), Some(rhs)) => Some(Self::new(lhs.inner() & rhs.inner())),
82 (Some(n), None) | (None, Some(n)) => Some(n.clone()),
83 (None, None) => None,
84 }
85 }
86
87 pub fn contains(&self, other: &NullBuffer) -> bool {
89 if other.null_count == 0 {
90 return true;
91 }
92 let lhs = self.inner().bit_chunks().iter_padded();
93 let rhs = other.inner().bit_chunks().iter_padded();
94 lhs.zip(rhs).all(|(l, r)| (l & !r) == 0)
95 }
96
97 pub fn expand(&self, count: usize) -> Self {
101 let capacity = self.buffer.len().checked_mul(count).unwrap();
102 let mut buffer = MutableBuffer::new_null(capacity);
103
104 for i in 0..self.buffer.len() {
107 if self.is_null(i) {
108 continue;
109 }
110 for j in 0..count {
111 crate::bit_util::set_bit(buffer.as_mut(), i * count + j)
112 }
113 }
114 Self {
115 buffer: BooleanBuffer::new(buffer.into(), 0, capacity),
116 null_count: self.null_count * count,
117 }
118 }
119
120 #[inline]
122 pub fn len(&self) -> usize {
123 self.buffer.len()
124 }
125
126 #[inline]
128 pub fn offset(&self) -> usize {
129 self.buffer.offset()
130 }
131
132 #[inline]
134 pub fn is_empty(&self) -> bool {
135 self.buffer.is_empty()
136 }
137
138 pub fn shrink_to_fit(&mut self) {
140 self.buffer.shrink_to_fit();
141 }
142
143 #[inline]
145 pub fn null_count(&self) -> usize {
146 self.null_count
147 }
148
149 #[inline]
151 pub fn is_valid(&self, idx: usize) -> bool {
152 self.buffer.value(idx)
153 }
154
155 #[inline]
157 pub fn is_null(&self, idx: usize) -> bool {
158 !self.is_valid(idx)
159 }
160
161 #[inline]
163 pub fn validity(&self) -> &[u8] {
164 self.buffer.values()
165 }
166
167 pub fn slice(&self, offset: usize, len: usize) -> Self {
169 Self::new(self.buffer.slice(offset, len))
170 }
171
172 pub fn iter(&self) -> BitIterator<'_> {
179 self.buffer.iter()
180 }
181
182 pub fn valid_indices(&self) -> BitIndexIterator<'_> {
186 self.buffer.set_indices()
187 }
188
189 pub fn valid_slices(&self) -> BitSliceIterator<'_> {
193 self.buffer.set_slices()
194 }
195
196 #[inline]
198 pub fn try_for_each_valid_idx<E, F: FnMut(usize) -> Result<(), E>>(
199 &self,
200 f: F,
201 ) -> Result<(), E> {
202 if self.null_count == self.len() {
203 return Ok(());
204 }
205 self.valid_indices().try_for_each(f)
206 }
207
208 #[inline]
210 pub fn inner(&self) -> &BooleanBuffer {
211 &self.buffer
212 }
213
214 #[inline]
216 pub fn into_inner(self) -> BooleanBuffer {
217 self.buffer
218 }
219
220 #[inline]
222 pub fn buffer(&self) -> &Buffer {
223 self.buffer.inner()
224 }
225
226 pub fn from_unsliced_buffer(buffer: impl Into<Buffer>, len: usize) -> Option<Self> {
230 let bb = BooleanBuffer::new(buffer.into(), 0, len);
231 let nb = NullBuffer::new(bb);
232 (nb.null_count() > 0).then_some(nb)
233 }
234
235 #[cfg(feature = "pool")]
237 pub fn claim(&self, pool: &dyn crate::MemoryPool) {
238 self.buffer.inner().claim(pool);
240 }
241}
242
243impl<'a> IntoIterator for &'a NullBuffer {
244 type Item = bool;
245 type IntoIter = BitIterator<'a>;
246
247 fn into_iter(self) -> Self::IntoIter {
248 self.buffer.iter()
249 }
250}
251
252impl From<BooleanBuffer> for NullBuffer {
253 fn from(value: BooleanBuffer) -> Self {
254 Self::new(value)
255 }
256}
257
258impl From<&[bool]> for NullBuffer {
259 fn from(value: &[bool]) -> Self {
260 BooleanBuffer::from(value).into()
261 }
262}
263
264impl<const N: usize> From<&[bool; N]> for NullBuffer {
265 fn from(value: &[bool; N]) -> Self {
266 value[..].into()
267 }
268}
269
270impl From<Vec<bool>> for NullBuffer {
271 fn from(value: Vec<bool>) -> Self {
272 BooleanBuffer::from(value).into()
273 }
274}
275
276impl FromIterator<bool> for NullBuffer {
277 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
278 BooleanBuffer::from_iter(iter).into()
279 }
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285
286 #[test]
287 fn test_size() {
288 assert_eq!(
290 std::mem::size_of::<NullBuffer>(),
291 std::mem::size_of::<Option<NullBuffer>>()
292 );
293 }
294
295 #[test]
296 fn test_from_unsliced_buffer_with_nulls() {
297 let buf = Buffer::from([0b10110010u8]);
299 let result = NullBuffer::from_unsliced_buffer(buf, 8);
300 assert!(result.is_some());
301 let nb = result.unwrap();
302 assert_eq!(nb.len(), 8);
303 assert_eq!(nb.null_count(), 4);
304 assert!(nb.is_null(0));
305 assert!(nb.is_valid(1));
306 assert!(nb.is_null(2));
307 assert!(nb.is_null(3));
308 assert!(nb.is_valid(4));
309 assert!(nb.is_valid(5));
310 assert!(nb.is_null(6));
311 assert!(nb.is_valid(7));
312 }
313
314 #[test]
315 fn test_from_unsliced_buffer_all_valid() {
316 let buf = Buffer::from([0b11111111u8]);
318 let result = NullBuffer::from_unsliced_buffer(buf, 8);
319 assert!(result.is_none());
320 }
321
322 #[test]
323 fn test_from_unsliced_buffer_all_null() {
324 let buf = Buffer::from([0b00000000u8]);
326 let result = NullBuffer::from_unsliced_buffer(buf, 8);
327 assert!(result.is_some());
328 let nb = result.unwrap();
329 assert_eq!(nb.len(), 8);
330 assert_eq!(nb.null_count(), 8);
331 }
332
333 #[test]
334 fn test_from_unsliced_buffer_empty() {
335 let buf = Buffer::from([]);
336 let result = NullBuffer::from_unsliced_buffer(buf, 0);
337 assert!(result.is_none());
338 }
339}