1use crate::array::print_long_array;
19use crate::builder::BooleanBuilder;
20use crate::iterator::BooleanIter;
21use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
22use arrow_buffer::{BooleanBuffer, Buffer, MutableBuffer, NullBuffer, bit_util};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::DataType;
25use std::any::Any;
26use std::sync::Arc;
27
28#[derive(Clone)]
68pub struct BooleanArray {
69 values: BooleanBuffer,
70 nulls: Option<NullBuffer>,
71}
72
73impl std::fmt::Debug for BooleanArray {
74 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
75 write!(f, "BooleanArray\n[\n")?;
76 print_long_array(self, f, |array, index, f| {
77 std::fmt::Debug::fmt(&array.value(index), f)
78 })?;
79 write!(f, "]")
80 }
81}
82
83impl BooleanArray {
84 pub fn new(values: BooleanBuffer, nulls: Option<NullBuffer>) -> Self {
90 if let Some(n) = nulls.as_ref() {
91 assert_eq!(values.len(), n.len());
92 }
93 Self { values, nulls }
94 }
95
96 pub fn new_null(len: usize) -> Self {
98 Self {
99 values: BooleanBuffer::new_unset(len),
100 nulls: Some(NullBuffer::new_null(len)),
101 }
102 }
103
104 pub fn new_scalar(value: bool) -> Scalar<Self> {
106 let values = match value {
107 true => BooleanBuffer::new_set(1),
108 false => BooleanBuffer::new_unset(1),
109 };
110 Scalar::new(Self::new(values, None))
111 }
112
113 pub fn new_from_packed(buffer: impl Into<Buffer>, offset: usize, len: usize) -> Self {
119 BooleanBuffer::new(buffer.into(), offset, len).into()
120 }
121
122 pub fn new_from_u8(value: &[u8]) -> Self {
128 BooleanBuffer::new(Buffer::from(value), 0, value.len() * 8).into()
129 }
130
131 pub fn len(&self) -> usize {
133 self.values.len()
134 }
135
136 pub fn is_empty(&self) -> bool {
138 self.values.is_empty()
139 }
140
141 pub fn slice(&self, offset: usize, length: usize) -> Self {
143 Self {
144 values: self.values.slice(offset, length),
145 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
146 }
147 }
148
149 pub fn builder(capacity: usize) -> BooleanBuilder {
151 BooleanBuilder::with_capacity(capacity)
152 }
153
154 pub fn values(&self) -> &BooleanBuffer {
156 &self.values
157 }
158
159 pub fn true_count(&self) -> usize {
161 match self.nulls() {
162 Some(nulls) => {
163 let null_chunks = nulls.inner().bit_chunks().iter_padded();
164 let value_chunks = self.values().bit_chunks().iter_padded();
165 null_chunks
166 .zip(value_chunks)
167 .map(|(a, b)| (a & b).count_ones() as usize)
168 .sum()
169 }
170 None => self.values().count_set_bits(),
171 }
172 }
173
174 pub fn false_count(&self) -> usize {
176 self.len() - self.null_count() - self.true_count()
177 }
178
179 pub unsafe fn value_unchecked(&self, i: usize) -> bool {
187 unsafe { self.values.value_unchecked(i) }
188 }
189
190 pub fn value(&self, i: usize) -> bool {
198 assert!(
199 i < self.len(),
200 "Trying to access an element at index {} from a BooleanArray of length {}",
201 i,
202 self.len()
203 );
204 unsafe { self.value_unchecked(i) }
207 }
208
209 pub fn take_iter<'a>(
211 &'a self,
212 indexes: impl Iterator<Item = Option<usize>> + 'a,
213 ) -> impl Iterator<Item = Option<bool>> + 'a {
214 indexes.map(|opt_index| opt_index.map(|index| self.value(index)))
215 }
216
217 pub unsafe fn take_iter_unchecked<'a>(
222 &'a self,
223 indexes: impl Iterator<Item = Option<usize>> + 'a,
224 ) -> impl Iterator<Item = Option<bool>> + 'a {
225 indexes.map(|opt_index| opt_index.map(|index| unsafe { self.value_unchecked(index) }))
226 }
227
228 pub fn from_unary<T: ArrayAccessor, F>(left: T, mut op: F) -> Self
239 where
240 F: FnMut(T::Item) -> bool,
241 {
242 let nulls = left.logical_nulls();
243 let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
244 op(left.value_unchecked(i))
246 });
247 Self::new(values, nulls)
248 }
249
250 pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(left: T, right: S, mut op: F) -> Self
267 where
268 F: FnMut(T::Item, S::Item) -> bool,
269 {
270 assert_eq!(left.len(), right.len());
271
272 let nulls = NullBuffer::union(
273 left.logical_nulls().as_ref(),
274 right.logical_nulls().as_ref(),
275 );
276 let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
277 op(left.value_unchecked(i), right.value_unchecked(i))
279 });
280 Self::new(values, nulls)
281 }
282
283 pub fn into_parts(self) -> (BooleanBuffer, Option<NullBuffer>) {
285 (self.values, self.nulls)
286 }
287}
288
289impl super::private::Sealed for BooleanArray {}
290
291impl Array for BooleanArray {
292 fn as_any(&self) -> &dyn Any {
293 self
294 }
295
296 fn to_data(&self) -> ArrayData {
297 self.clone().into()
298 }
299
300 fn into_data(self) -> ArrayData {
301 self.into()
302 }
303
304 fn data_type(&self) -> &DataType {
305 &DataType::Boolean
306 }
307
308 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
309 Arc::new(self.slice(offset, length))
310 }
311
312 fn len(&self) -> usize {
313 self.values.len()
314 }
315
316 fn is_empty(&self) -> bool {
317 self.values.is_empty()
318 }
319
320 fn shrink_to_fit(&mut self) {
321 self.values.shrink_to_fit();
322 if let Some(nulls) = &mut self.nulls {
323 nulls.shrink_to_fit();
324 }
325 }
326
327 fn offset(&self) -> usize {
328 self.values.offset()
329 }
330
331 fn nulls(&self) -> Option<&NullBuffer> {
332 self.nulls.as_ref()
333 }
334
335 fn logical_null_count(&self) -> usize {
336 self.null_count()
337 }
338
339 fn get_buffer_memory_size(&self) -> usize {
340 let mut sum = self.values.inner().capacity();
341 if let Some(x) = &self.nulls {
342 sum += x.buffer().capacity()
343 }
344 sum
345 }
346
347 fn get_array_memory_size(&self) -> usize {
348 std::mem::size_of::<Self>() + self.get_buffer_memory_size()
349 }
350}
351
352impl ArrayAccessor for &BooleanArray {
353 type Item = bool;
354
355 fn value(&self, index: usize) -> Self::Item {
356 BooleanArray::value(self, index)
357 }
358
359 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
360 unsafe { BooleanArray::value_unchecked(self, index) }
361 }
362}
363
364impl From<Vec<bool>> for BooleanArray {
365 fn from(data: Vec<bool>) -> Self {
366 let mut mut_buf = MutableBuffer::new_null(data.len());
367 {
368 let mut_slice = mut_buf.as_slice_mut();
369 for (i, b) in data.iter().enumerate() {
370 if *b {
371 bit_util::set_bit(mut_slice, i);
372 }
373 }
374 }
375 let array_data = ArrayData::builder(DataType::Boolean)
376 .len(data.len())
377 .add_buffer(mut_buf.into());
378
379 let array_data = unsafe { array_data.build_unchecked() };
380 BooleanArray::from(array_data)
381 }
382}
383
384impl From<Vec<Option<bool>>> for BooleanArray {
385 fn from(data: Vec<Option<bool>>) -> Self {
386 data.iter().collect()
387 }
388}
389
390impl From<ArrayData> for BooleanArray {
391 fn from(data: ArrayData) -> Self {
392 let (data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts();
393 assert_eq!(
394 data_type,
395 DataType::Boolean,
396 "BooleanArray expected ArrayData with type Boolean got {data_type:?}",
397 );
398 assert_eq!(
399 buffers.len(),
400 1,
401 "BooleanArray data should contain a single buffer only (values buffer)"
402 );
403 let buffer = buffers.pop().expect("checked above");
404 let values = BooleanBuffer::new(buffer, offset, len);
405
406 Self { values, nulls }
407 }
408}
409
410impl From<BooleanArray> for ArrayData {
411 fn from(array: BooleanArray) -> Self {
412 let builder = ArrayDataBuilder::new(DataType::Boolean)
413 .len(array.values.len())
414 .offset(array.values.offset())
415 .nulls(array.nulls)
416 .buffers(vec![array.values.into_inner()]);
417
418 unsafe { builder.build_unchecked() }
419 }
420}
421
422impl<'a> IntoIterator for &'a BooleanArray {
423 type Item = Option<bool>;
424 type IntoIter = BooleanIter<'a>;
425
426 fn into_iter(self) -> Self::IntoIter {
427 BooleanIter::<'a>::new(self)
428 }
429}
430
431impl<'a> BooleanArray {
432 pub fn iter(&'a self) -> BooleanIter<'a> {
434 BooleanIter::<'a>::new(self)
435 }
436}
437
438#[derive(Debug)]
447struct BooleanAdapter {
448 pub native: Option<bool>,
450}
451
452impl From<bool> for BooleanAdapter {
453 fn from(value: bool) -> Self {
454 BooleanAdapter {
455 native: Some(value),
456 }
457 }
458}
459
460impl From<&bool> for BooleanAdapter {
461 fn from(value: &bool) -> Self {
462 BooleanAdapter {
463 native: Some(*value),
464 }
465 }
466}
467
468impl From<Option<bool>> for BooleanAdapter {
469 fn from(value: Option<bool>) -> Self {
470 BooleanAdapter { native: value }
471 }
472}
473
474impl From<&Option<bool>> for BooleanAdapter {
475 fn from(value: &Option<bool>) -> Self {
476 BooleanAdapter { native: *value }
477 }
478}
479
480impl<Ptr: Into<BooleanAdapter>> FromIterator<Ptr> for BooleanArray {
481 fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
482 let iter = iter.into_iter();
483 let capacity = match iter.size_hint() {
484 (lower, Some(upper)) if lower == upper => lower,
485 _ => 0,
486 };
487 let mut builder = BooleanBuilder::with_capacity(capacity);
488 builder.extend(iter.map(|item| item.into().native));
489 builder.finish()
490 }
491}
492
493impl BooleanArray {
494 #[inline]
506 #[allow(
507 private_bounds,
508 reason = "We will expose BooleanAdapter if there is a need"
509 )]
510 pub unsafe fn from_trusted_len_iter<I, P>(iter: I) -> Self
511 where
512 P: Into<BooleanAdapter>,
513 I: ExactSizeIterator<Item = P>,
514 {
515 let data_len = iter.len();
516
517 let num_bytes = bit_util::ceil(data_len, 8);
518 let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes);
519 let mut val_builder = MutableBuffer::from_len_zeroed(num_bytes);
520
521 let data = val_builder.as_slice_mut();
522
523 let null_slice = null_builder.as_slice_mut();
524 iter.enumerate().for_each(|(i, item)| {
525 if let Some(a) = item.into().native {
526 unsafe {
527 bit_util::set_bit_raw(null_slice.as_mut_ptr(), i);
530 if a {
531 bit_util::set_bit_raw(data.as_mut_ptr(), i);
532 }
533 }
534 }
535 });
536
537 let data = unsafe {
538 ArrayData::new_unchecked(
539 DataType::Boolean,
540 data_len,
541 None,
542 Some(null_builder.into()),
543 0,
544 vec![val_builder.into()],
545 vec![],
546 )
547 };
548 BooleanArray::from(data)
549 }
550}
551
552impl From<BooleanBuffer> for BooleanArray {
553 fn from(values: BooleanBuffer) -> Self {
554 Self {
555 values,
556 nulls: None,
557 }
558 }
559}
560
561#[cfg(test)]
562mod tests {
563 use super::*;
564 use arrow_buffer::Buffer;
565 use rand::{Rng, rng};
566
567 #[test]
568 fn test_boolean_fmt_debug() {
569 let arr = BooleanArray::from(vec![true, false, false]);
570 assert_eq!(
571 "BooleanArray\n[\n true,\n false,\n false,\n]",
572 format!("{arr:?}")
573 );
574 }
575
576 #[test]
577 fn test_boolean_with_null_fmt_debug() {
578 let mut builder = BooleanArray::builder(3);
579 builder.append_value(true);
580 builder.append_null();
581 builder.append_value(false);
582 let arr = builder.finish();
583 assert_eq!(
584 "BooleanArray\n[\n true,\n null,\n false,\n]",
585 format!("{arr:?}")
586 );
587 }
588
589 #[test]
590 fn test_boolean_array_from_vec() {
591 let buf = Buffer::from([10_u8]);
592 let arr = BooleanArray::from(vec![false, true, false, true]);
593 assert_eq!(&buf, arr.values().inner());
594 assert_eq!(4, arr.len());
595 assert_eq!(0, arr.offset());
596 assert_eq!(0, arr.null_count());
597 for i in 0..4 {
598 assert!(!arr.is_null(i));
599 assert!(arr.is_valid(i));
600 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
601 }
602 }
603
604 #[test]
605 fn test_boolean_array_from_vec_option() {
606 let buf = Buffer::from([10_u8]);
607 let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
608 assert_eq!(&buf, arr.values().inner());
609 assert_eq!(4, arr.len());
610 assert_eq!(0, arr.offset());
611 assert_eq!(1, arr.null_count());
612 for i in 0..4 {
613 if i == 2 {
614 assert!(arr.is_null(i));
615 assert!(!arr.is_valid(i));
616 } else {
617 assert!(!arr.is_null(i));
618 assert!(arr.is_valid(i));
619 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
620 }
621 }
622 }
623
624 #[test]
625 fn test_boolean_array_from_packed() {
626 let v = [1_u8, 2_u8, 3_u8];
627 let arr = BooleanArray::new_from_packed(v, 0, 24);
628 assert_eq!(24, arr.len());
629 assert_eq!(0, arr.offset());
630 assert_eq!(0, arr.null_count());
631 assert!(arr.nulls.is_none());
632 for i in 0..24 {
633 assert!(!arr.is_null(i));
634 assert!(arr.is_valid(i));
635 assert_eq!(
636 i == 0 || i == 9 || i == 16 || i == 17,
637 arr.value(i),
638 "failed t {i}"
639 )
640 }
641 }
642
643 #[test]
644 fn test_boolean_array_from_slice_u8() {
645 let v: Vec<u8> = vec![1, 2, 3];
646 let slice = &v[..];
647 let arr = BooleanArray::new_from_u8(slice);
648 assert_eq!(24, arr.len());
649 assert_eq!(0, arr.offset());
650 assert_eq!(0, arr.null_count());
651 assert!(arr.nulls().is_none());
652 for i in 0..24 {
653 assert!(!arr.is_null(i));
654 assert!(arr.is_valid(i));
655 assert_eq!(
656 i == 0 || i == 9 || i == 16 || i == 17,
657 arr.value(i),
658 "failed t {i}"
659 )
660 }
661 }
662
663 #[test]
664 fn test_boolean_array_from_iter() {
665 let v = vec![Some(false), Some(true), Some(false), Some(true)];
666 let arr = v.into_iter().collect::<BooleanArray>();
667 assert_eq!(4, arr.len());
668 assert_eq!(0, arr.offset());
669 assert_eq!(0, arr.null_count());
670 assert!(arr.nulls().is_none());
671 for i in 0..3 {
672 assert!(!arr.is_null(i));
673 assert!(arr.is_valid(i));
674 assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}")
675 }
676 }
677
678 #[test]
679 fn test_boolean_array_from_non_nullable_iter() {
680 let v = vec![true, false, true];
681 let arr = v.into_iter().collect::<BooleanArray>();
682 assert_eq!(3, arr.len());
683 assert_eq!(0, arr.offset());
684 assert_eq!(0, arr.null_count());
685 assert!(arr.nulls().is_none());
686
687 assert!(arr.value(0));
688 assert!(!arr.value(1));
689 assert!(arr.value(2));
690 }
691
692 #[test]
693 fn test_boolean_array_from_nullable_iter() {
694 let v = vec![Some(true), None, Some(false), None];
695 let arr = v.into_iter().collect::<BooleanArray>();
696 assert_eq!(4, arr.len());
697 assert_eq!(0, arr.offset());
698 assert_eq!(2, arr.null_count());
699 assert!(arr.nulls().is_some());
700
701 assert!(arr.is_valid(0));
702 assert!(arr.is_null(1));
703 assert!(arr.is_valid(2));
704 assert!(arr.is_null(3));
705
706 assert!(arr.value(0));
707 assert!(!arr.value(2));
708 }
709
710 #[test]
711 fn test_boolean_array_from_nullable_trusted_len_iter() {
712 let v = vec![Some(true), None, Some(false), None];
714 let expected = v.clone().into_iter().collect::<BooleanArray>();
715 let actual = unsafe {
716 BooleanArray::from_trusted_len_iter(v.into_iter())
718 };
719 assert_eq!(expected, actual);
720 }
721
722 #[test]
723 fn test_boolean_array_from_iter_with_larger_upper_bound() {
724 let iterator = vec![Some(true), None, Some(false), None]
727 .into_iter()
728 .filter(Option::is_some);
729 let arr = iterator.collect::<BooleanArray>();
730 assert_eq!(2, arr.len());
731 }
732
733 #[test]
734 fn test_boolean_array_builder() {
735 let buf = Buffer::from([27_u8]);
738 let buf2 = buf.clone();
739 let data = ArrayData::builder(DataType::Boolean)
740 .len(5)
741 .offset(2)
742 .add_buffer(buf)
743 .build()
744 .unwrap();
745 let arr = BooleanArray::from(data);
746 assert_eq!(&buf2, arr.values().inner());
747 assert_eq!(5, arr.len());
748 assert_eq!(2, arr.offset());
749 assert_eq!(0, arr.null_count());
750 for i in 0..3 {
751 assert_eq!(i != 0, arr.value(i), "failed at {i}");
752 }
753 }
754
755 #[test]
756 #[should_panic(
757 expected = "Trying to access an element at index 4 from a BooleanArray of length 3"
758 )]
759 fn test_fixed_size_binary_array_get_value_index_out_of_bound() {
760 let v = vec![Some(true), None, Some(false)];
761 let array = v.into_iter().collect::<BooleanArray>();
762
763 array.value(4);
764 }
765
766 #[test]
767 #[should_panic(expected = "BooleanArray data should contain a single buffer only \
768 (values buffer)")]
769 #[cfg(not(feature = "force_validate"))]
772 fn test_boolean_array_invalid_buffer_len() {
773 let data = unsafe {
774 ArrayData::builder(DataType::Boolean)
775 .len(5)
776 .build_unchecked()
777 };
778 drop(BooleanArray::from(data));
779 }
780
781 #[test]
782 #[should_panic(expected = "BooleanArray expected ArrayData with type Boolean got Int32")]
783 fn test_from_array_data_validation() {
784 let _ = BooleanArray::from(ArrayData::new_empty(&DataType::Int32));
785 }
786
787 #[test]
788 #[cfg_attr(miri, ignore)] fn test_true_false_count() {
790 let mut rng = rng();
791
792 for _ in 0..10 {
793 let d: Vec<_> = (0..2000).map(|_| rng.random_bool(0.5)).collect();
795 let b = BooleanArray::from(d.clone());
796
797 let expected_true = d.iter().filter(|x| **x).count();
798 assert_eq!(b.true_count(), expected_true);
799 assert_eq!(b.false_count(), d.len() - expected_true);
800
801 let d: Vec<_> = (0..2000)
803 .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5)))
804 .collect();
805 let b = BooleanArray::from(d.clone());
806
807 let expected_true = d.iter().filter(|x| matches!(x, Some(true))).count();
808 assert_eq!(b.true_count(), expected_true);
809
810 let expected_false = d.iter().filter(|x| matches!(x, Some(false))).count();
811 assert_eq!(b.false_count(), expected_false);
812 }
813 }
814
815 #[test]
816 fn test_into_parts() {
817 let boolean_array = [Some(true), None, Some(false)]
818 .into_iter()
819 .collect::<BooleanArray>();
820 let (values, nulls) = boolean_array.into_parts();
821 assert_eq!(values.values(), &[0b0000_0001]);
822 assert!(nulls.is_some());
823 assert_eq!(nulls.unwrap().buffer().as_slice(), &[0b0000_0101]);
824
825 let boolean_array =
826 BooleanArray::from(vec![false, false, false, false, false, false, false, true]);
827 let (values, nulls) = boolean_array.into_parts();
828 assert_eq!(values.values(), &[0b1000_0000]);
829 assert!(nulls.is_none());
830 }
831
832 #[test]
833 fn test_new_null_array() {
834 let arr = BooleanArray::new_null(5);
835
836 assert_eq!(arr.len(), 5);
837 assert_eq!(arr.null_count(), 5);
838 assert_eq!(arr.true_count(), 0);
839 assert_eq!(arr.false_count(), 0);
840
841 for i in 0..5 {
842 assert!(arr.is_null(i));
843 assert!(!arr.is_valid(i));
844 }
845 }
846
847 #[test]
848 fn test_slice_with_nulls() {
849 let arr = BooleanArray::from(vec![Some(true), None, Some(false)]);
850 let sliced = arr.slice(1, 2);
851
852 assert_eq!(sliced.len(), 2);
853 assert_eq!(sliced.null_count(), 1);
854
855 assert!(sliced.is_null(0));
856 assert!(sliced.is_valid(1));
857 assert!(!sliced.value(1));
858 }
859}