1use arrow_array::{Array, ArrayRef, BooleanArray, make_array};
21use arrow_buffer::buffer::bitwise_bin_op_helper;
22use arrow_buffer::{BooleanBuffer, NullBuffer, bitwise_unary_op_helper};
23use arrow_schema::{ArrowError, DataType};
24
25pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowError> {
45 let left_data = left.to_data();
46
47 if left_data.len() != right.len() {
48 return Err(ArrowError::ComputeError(
49 "Cannot perform comparison operation on arrays of different length".to_string(),
50 ));
51 }
52 let len = left_data.len();
53
54 if len == 0 || left_data.data_type() == &DataType::Null {
55 return Ok(make_array(left_data));
56 }
57
58 let right = match right.nulls() {
69 Some(nulls) => right.values() & nulls.inner(),
70 None => right.values().clone(),
71 };
72
73 let (combined, null_count) = match left_data.nulls() {
76 Some(left) => {
77 let mut valid_count = 0;
78 let b = bitwise_bin_op_helper(
79 left.buffer(),
80 left.offset(),
81 right.inner(),
82 right.offset(),
83 len,
84 |l, r| {
85 let t = l & !r;
86 valid_count += t.count_ones() as usize;
87 t
88 },
89 );
90 (b, len - valid_count)
91 }
92 None => {
93 let mut null_count = 0;
94 let buffer = bitwise_unary_op_helper(right.inner(), right.offset(), len, |b| {
95 let t = !b;
96 null_count += t.count_zeros() as usize;
97 t
98 });
99 (buffer, null_count)
100 }
101 };
102
103 let combined = BooleanBuffer::new(combined, 0, len);
104 let nulls = unsafe { NullBuffer::new_unchecked(combined, null_count) };
107 let data = left_data.into_builder().nulls(Some(nulls));
108
109 Ok(make_array(unsafe { data.build_unchecked() }))
112}
113
114#[cfg(test)]
115mod tests {
116 use super::*;
117 use arrow_array::builder::{BooleanBuilder, Int32Builder, StructBuilder};
118 use arrow_array::cast::AsArray;
119 use arrow_array::types::Int32Type;
120 use arrow_array::{Int32Array, NullArray, StringArray, StructArray};
121 use arrow_data::ArrayData;
122 use arrow_schema::{Field, Fields};
123 use rand::prelude::StdRng;
124 use rand::{Rng, SeedableRng};
125
126 #[test]
127 fn test_nullif_int_array() {
128 let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9)]);
129 let comp = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
130 let res = nullif(&a, &comp).unwrap();
131
132 let expected = Int32Array::from(vec![
133 Some(15),
134 None,
135 None, Some(1),
137 Some(9),
140 ]);
141
142 let res = res.as_primitive::<Int32Type>();
143 assert_eq!(&expected, res);
144 }
145
146 #[test]
147 fn test_nullif_null_array() {
148 assert_eq!(
149 nullif(&NullArray::new(0), &BooleanArray::new_null(0))
150 .unwrap()
151 .as_ref(),
152 &NullArray::new(0)
153 );
154
155 assert_eq!(
156 nullif(
157 &NullArray::new(3),
158 &BooleanArray::from(vec![Some(false), Some(true), None]),
159 )
160 .unwrap()
161 .as_ref(),
162 &NullArray::new(3)
163 );
164 }
165
166 #[test]
167 fn test_nullif_int_array_offset() {
168 let a = Int32Array::from(vec![None, Some(15), Some(8), Some(1), Some(9)]);
169 let a = a.slice(1, 3); let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
171 let comp = BooleanArray::from(vec![
172 Some(false),
173 Some(false),
174 Some(false),
175 None,
176 Some(true),
177 Some(false),
178 None,
179 ]);
180 let comp = comp.slice(2, 3); let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
182 let res = nullif(a, comp).unwrap();
183
184 let expected = Int32Array::from(vec![
185 Some(15), Some(8), None, ]);
189 let res = res.as_primitive::<Int32Type>();
190 assert_eq!(&expected, res)
191 }
192
193 #[test]
194 fn test_nullif_string() {
195 let s = StringArray::from_iter([
196 Some("hello"),
197 None,
198 Some("world"),
199 Some("a"),
200 Some("b"),
201 None,
202 None,
203 ]);
204 let select = BooleanArray::from_iter([
205 Some(true),
206 Some(true),
207 Some(false),
208 Some(true),
209 Some(false),
210 Some(false),
211 None,
212 ]);
213
214 let a = nullif(&s, &select).unwrap();
215 let r: Vec<_> = a.as_string::<i32>().iter().collect();
216 assert_eq!(
217 r,
218 vec![None, None, Some("world"), None, Some("b"), None, None]
219 );
220
221 let s = s.slice(2, 3);
222 let select = select.slice(1, 3);
223 let a = nullif(&s, &select).unwrap();
224 let r: Vec<_> = a.as_string::<i32>().iter().collect();
225 assert_eq!(r, vec![None, Some("a"), None]);
226 }
227
228 #[test]
229 fn test_nullif_int_large_left_offset() {
230 let a = Int32Array::from(vec![
231 Some(-1), Some(-1),
233 Some(-1),
234 Some(-1),
235 Some(-1),
236 Some(-1),
237 Some(-1),
238 Some(-1),
239 Some(-1), Some(-1),
241 Some(-1),
242 Some(-1),
243 Some(-1),
244 Some(-1),
245 Some(-1),
246 Some(-1),
247 None, Some(15), Some(8),
250 Some(1),
251 Some(9),
252 ]);
253 let a = a.slice(17, 3); let comp = BooleanArray::from(vec![
256 Some(false),
257 Some(false),
258 Some(false),
259 None,
260 Some(true),
261 Some(false),
262 None,
263 ]);
264 let comp = comp.slice(2, 3); let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
266 let res = nullif(&a, comp).unwrap();
267 let res = res.as_any().downcast_ref::<Int32Array>().unwrap();
268
269 let expected = Int32Array::from(vec![
270 Some(15), Some(8), None, ]);
274 assert_eq!(&expected, res)
275 }
276
277 #[test]
278 fn test_nullif_int_large_right_offset() {
279 let a = Int32Array::from(vec![
280 None, Some(15), Some(8),
283 Some(1),
284 Some(9),
285 ]);
286 let a = a.slice(1, 3); let comp = BooleanArray::from(vec![
289 Some(false), Some(false),
291 Some(false),
292 Some(false),
293 Some(false),
294 Some(false),
295 Some(false),
296 Some(false),
297 Some(false), Some(false),
299 Some(false),
300 Some(false),
301 Some(false),
302 Some(false),
303 Some(false),
304 Some(false),
305 Some(false), Some(false), Some(false), None,
309 Some(true),
310 Some(false),
311 None,
312 ]);
313 let comp = comp.slice(18, 3); let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
315 let res = nullif(&a, comp).unwrap();
316 let res = res.as_any().downcast_ref::<Int32Array>().unwrap();
317
318 let expected = Int32Array::from(vec![
319 Some(15), Some(8), None, ]);
323 assert_eq!(&expected, res)
324 }
325
326 #[test]
327 fn test_nullif_boolean_offset() {
328 let a = BooleanArray::from(vec![
329 None, Some(true), Some(false),
332 Some(true),
333 Some(true),
334 ]);
335 let a = a.slice(1, 3); let comp = BooleanArray::from(vec![
338 Some(false), Some(false), Some(false), None,
342 Some(true),
343 Some(false),
344 None,
345 ]);
346 let comp = comp.slice(2, 3); let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
348 let res = nullif(&a, comp).unwrap();
349 let res = res.as_any().downcast_ref::<BooleanArray>().unwrap();
350
351 let expected = BooleanArray::from(vec![
352 Some(true), Some(false), None, ]);
356 assert_eq!(&expected, res)
357 }
358
359 struct Foo {
360 a: Option<i32>,
361 b: Option<bool>,
362 is_valid: bool,
364 }
365
366 impl Foo {
367 fn new_valid(a: i32, b: bool) -> Foo {
368 Self {
369 a: Some(a),
370 b: Some(b),
371 is_valid: true,
372 }
373 }
374
375 fn new_null() -> Foo {
376 Self {
377 a: None,
378 b: None,
379 is_valid: false,
380 }
381 }
382 }
383
384 fn create_foo_struct(values: Vec<Foo>) -> StructArray {
388 let mut struct_array = StructBuilder::new(
389 Fields::from(vec![
390 Field::new("a", DataType::Int32, true),
391 Field::new("b", DataType::Boolean, true),
392 ]),
393 vec![
394 Box::new(Int32Builder::with_capacity(values.len())),
395 Box::new(BooleanBuilder::with_capacity(values.len())),
396 ],
397 );
398
399 for value in values {
400 struct_array
401 .field_builder::<Int32Builder>(0)
402 .unwrap()
403 .append_option(value.a);
404 struct_array
405 .field_builder::<BooleanBuilder>(1)
406 .unwrap()
407 .append_option(value.b);
408 struct_array.append(value.is_valid);
409 }
410
411 struct_array.finish()
412 }
413
414 #[test]
415 fn test_nullif_struct_slices() {
416 let struct_array = create_foo_struct(vec![
417 Foo::new_valid(7, true),
418 Foo::new_valid(15, false),
419 Foo::new_valid(8, true),
420 Foo::new_valid(12, false),
421 Foo::new_null(),
422 Foo::new_null(),
423 Foo::new_valid(42, true),
424 ]);
425
426 let struct_array = struct_array.slice(1, 5);
429 let comp = BooleanArray::from(vec![
430 Some(false), Some(false), Some(false), None,
434 Some(true),
435 Some(false),
436 None,
437 ]);
438 let comp = comp.slice(2, 5); let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
440 let res = nullif(&struct_array, comp).unwrap();
441 let res = res.as_any().downcast_ref::<StructArray>().unwrap();
442
443 let expected = create_foo_struct(vec![
444 Foo::new_valid(15, false),
446 Foo::new_valid(8, true),
448 Foo {
450 a: Some(12),
451 b: Some(false),
452 is_valid: false,
453 },
454 Foo::new_null(),
456 Foo::new_null(),
458 ]);
459
460 assert_eq!(&expected, res);
461 }
462
463 #[test]
464 fn test_nullif_no_nulls() {
465 let a = Int32Array::from(vec![Some(15), Some(7), Some(8), Some(1), Some(9)]);
466 let comp = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
467 let res = nullif(&a, &comp).unwrap();
468 let res = res.as_primitive::<Int32Type>();
469
470 let expected = Int32Array::from(vec![Some(15), Some(7), None, Some(1), Some(9)]);
471 assert_eq!(res, &expected);
472 }
473
474 #[test]
475 fn nullif_empty() {
476 let a = Int32Array::from(ArrayData::new_empty(&DataType::Int32));
477 let mask = BooleanArray::from(ArrayData::new_empty(&DataType::Boolean));
478 let res = nullif(&a, &mask).unwrap();
479 assert_eq!(res.as_ref(), &a);
480 }
481
482 fn test_nullif(values: &Int32Array, filter: &BooleanArray) {
483 let expected: Int32Array = values
484 .iter()
485 .zip(filter.iter())
486 .map(|(a, b)| match b {
487 Some(true) => None,
488 Some(false) | None => a,
489 })
490 .collect();
491
492 let r = nullif(values, filter).unwrap();
493 let r_data = r.to_data();
494 r_data.validate().unwrap();
495
496 assert_eq!(
497 r.as_ref(),
498 &expected,
499 "expected nulls: {:#?}\n\n\
500 result nulls: {:#?}\n\n\\
501 expected values: {:#?}\n\n\
502 result values: {:#?}",
503 expected.nulls(),
504 r.nulls(),
505 expected.values(),
506 r.as_primitive::<Int32Type>().values()
507 );
508 validate_nulls(expected.nulls());
509 validate_nulls(r.nulls());
510 }
511
512 fn validate_nulls(nulls: Option<&NullBuffer>) {
514 let Some(nulls) = nulls else {
515 return;
516 };
517 let mut actual_null_count = 0;
518 for i in 0..nulls.len() {
519 if nulls.is_null(i) {
520 actual_null_count += 1;
521 }
522 }
523 assert_eq!(actual_null_count, nulls.null_count());
524 }
525
526 #[test]
527 fn nullif_fuzz() {
528 let mut rng = StdRng::seed_from_u64(7337);
529
530 let arrays = [
531 Int32Array::from(vec![0; 1024]), (0..1024) .map(|_| rng.random_bool(0.5).then_some(1))
534 .collect(),
535 ];
536
537 for a in arrays {
538 let a_slices = [
539 (0, 128),
540 (0, 129),
541 (64, 64),
542 (0, 64),
543 (32, 32),
544 (0, 0),
545 (32, 0),
546 (5, 800),
547 (33, 53),
548 (77, 101),
549 ];
550 for (a_offset, a_length) in a_slices {
551 let a = a.slice(a_offset, a_length);
552
553 for i in 1..65 {
554 let b_start_offset = rng.random_range(0..i);
555 let b_end_offset = rng.random_range(0..i);
556
557 let b: BooleanArray = (0..a_length + b_start_offset + b_end_offset)
559 .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5)))
560 .collect();
561 let b_sliced = b.slice(b_start_offset, a_length);
562 test_nullif(&a, &b_sliced);
563
564 let b = remove_null_buffer(&b);
566 let b_sliced = b.slice(b_start_offset, a_length);
567 test_nullif(&a, &b_sliced);
568
569 let b = remove_null_values(&b);
571 let b_sliced = b.slice(b_start_offset, a_length);
572 test_nullif(&a, &b_sliced);
573 }
574 }
575 }
576 }
577
578 fn remove_null_buffer(array: &BooleanArray) -> BooleanArray {
580 make_array(
581 array
582 .into_data()
583 .into_builder()
584 .nulls(None)
585 .build()
586 .unwrap(),
587 )
588 .as_boolean()
589 .clone()
590 }
591
592 fn remove_null_values(array: &BooleanArray) -> BooleanArray {
594 let len = array.len();
595 let new_nulls = NullBuffer::from_iter(std::iter::repeat_n(true, len));
596 make_array(
597 array
598 .into_data()
599 .into_builder()
600 .nulls(Some(new_nulls))
601 .build()
602 .unwrap(),
603 )
604 .as_boolean()
605 .clone()
606 }
607}