arrow_select/
nullif.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Implements the `nullif` function for Arrow arrays.
19
20use arrow_array::{Array, ArrayRef, BooleanArray, make_array};
21use arrow_buffer::buffer::bitwise_bin_op_helper;
22use arrow_buffer::{BooleanBuffer, NullBuffer, bitwise_unary_op_helper};
23use arrow_schema::{ArrowError, DataType};
24
25/// Returns a new array with the same values and the validity bit to false where
26/// the corresponding element of `right` is true.
27///
28/// This can be used to implement SQL `NULLIF`
29///
30/// # Example
31/// ```
32/// # use arrow_array::{Int32Array, BooleanArray};
33/// # use arrow_array::cast::AsArray;
34/// # use arrow_array::types::Int32Type;
35/// # use arrow_select::nullif::nullif;
36/// // input is [null, 8, 1, 9]
37/// let a = Int32Array::from(vec![None, Some(8), Some(1), Some(9)]);
38/// // use nullif to set index 1 to null
39/// let bool_array = BooleanArray::from(vec![Some(false), Some(true), Some(false), None]);
40/// let nulled = nullif(&a, &bool_array).unwrap();
41/// // The resulting array is [null, null, 1, 9]
42/// assert_eq!(nulled.as_primitive(), &Int32Array::from(vec![None, None, Some(1), Some(9)]));
43/// ```
44pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowError> {
45    let left_data = left.to_data();
46
47    if left_data.len() != right.len() {
48        return Err(ArrowError::ComputeError(
49            "Cannot perform comparison operation on arrays of different length".to_string(),
50        ));
51    }
52    let len = left_data.len();
53
54    if len == 0 || left_data.data_type() == &DataType::Null {
55        return Ok(make_array(left_data));
56    }
57
58    // left=0 (null)   right=null       output bitmap=null
59    // left=0          right=1          output bitmap=null
60    // left=1 (set)    right=null       output bitmap=set   (passthrough)
61    // left=1          right=1 & comp=true    output bitmap=null
62    // left=1          right=1 & comp=false   output bitmap=set
63    //
64    // Thus: result = left null bitmap & (!right_values | !right_bitmap)
65    //              OR left null bitmap & !(right_values & right_bitmap)
66
67    // Compute right_values & right_bitmap
68    let right = match right.nulls() {
69        Some(nulls) => right.values() & nulls.inner(),
70        None => right.values().clone(),
71    };
72
73    // Compute left null bitmap & !right
74
75    let (combined, null_count) = match left_data.nulls() {
76        Some(left) => {
77            let mut valid_count = 0;
78            let b = bitwise_bin_op_helper(
79                left.buffer(),
80                left.offset(),
81                right.inner(),
82                right.offset(),
83                len,
84                |l, r| {
85                    let t = l & !r;
86                    valid_count += t.count_ones() as usize;
87                    t
88                },
89            );
90            (b, len - valid_count)
91        }
92        None => {
93            let mut null_count = 0;
94            let buffer = bitwise_unary_op_helper(right.inner(), right.offset(), len, |b| {
95                let t = !b;
96                null_count += t.count_zeros() as usize;
97                t
98            });
99            (buffer, null_count)
100        }
101    };
102
103    let combined = BooleanBuffer::new(combined, 0, len);
104    // Safety:
105    // Counted nulls whilst computing
106    let nulls = unsafe { NullBuffer::new_unchecked(combined, null_count) };
107    let data = left_data.into_builder().nulls(Some(nulls));
108
109    // SAFETY:
110    // Only altered null mask
111    Ok(make_array(unsafe { data.build_unchecked() }))
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117    use arrow_array::builder::{BooleanBuilder, Int32Builder, StructBuilder};
118    use arrow_array::cast::AsArray;
119    use arrow_array::types::Int32Type;
120    use arrow_array::{Int32Array, NullArray, StringArray, StructArray};
121    use arrow_data::ArrayData;
122    use arrow_schema::{Field, Fields};
123    use rand::prelude::StdRng;
124    use rand::{Rng, SeedableRng};
125
126    #[test]
127    fn test_nullif_int_array() {
128        let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9)]);
129        let comp = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
130        let res = nullif(&a, &comp).unwrap();
131
132        let expected = Int32Array::from(vec![
133            Some(15),
134            None,
135            None, // comp true, slot 2 turned into null
136            Some(1),
137            // Even though comp array / right is null, should still pass through original value
138            // comp true, slot 2 turned into null
139            Some(9),
140        ]);
141
142        let res = res.as_primitive::<Int32Type>();
143        assert_eq!(&expected, res);
144    }
145
146    #[test]
147    fn test_nullif_null_array() {
148        assert_eq!(
149            nullif(&NullArray::new(0), &BooleanArray::new_null(0))
150                .unwrap()
151                .as_ref(),
152            &NullArray::new(0)
153        );
154
155        assert_eq!(
156            nullif(
157                &NullArray::new(3),
158                &BooleanArray::from(vec![Some(false), Some(true), None]),
159            )
160            .unwrap()
161            .as_ref(),
162            &NullArray::new(3)
163        );
164    }
165
166    #[test]
167    fn test_nullif_int_array_offset() {
168        let a = Int32Array::from(vec![None, Some(15), Some(8), Some(1), Some(9)]);
169        let a = a.slice(1, 3); // Some(15), Some(8), Some(1)
170        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
171        let comp = BooleanArray::from(vec![
172            Some(false),
173            Some(false),
174            Some(false),
175            None,
176            Some(true),
177            Some(false),
178            None,
179        ]);
180        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
181        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
182        let res = nullif(a, comp).unwrap();
183
184        let expected = Int32Array::from(vec![
185            Some(15), // False => keep it
186            Some(8),  // None => keep it
187            None,     // true => None
188        ]);
189        let res = res.as_primitive::<Int32Type>();
190        assert_eq!(&expected, res)
191    }
192
193    #[test]
194    fn test_nullif_string() {
195        let s = StringArray::from_iter([
196            Some("hello"),
197            None,
198            Some("world"),
199            Some("a"),
200            Some("b"),
201            None,
202            None,
203        ]);
204        let select = BooleanArray::from_iter([
205            Some(true),
206            Some(true),
207            Some(false),
208            Some(true),
209            Some(false),
210            Some(false),
211            None,
212        ]);
213
214        let a = nullif(&s, &select).unwrap();
215        let r: Vec<_> = a.as_string::<i32>().iter().collect();
216        assert_eq!(
217            r,
218            vec![None, None, Some("world"), None, Some("b"), None, None]
219        );
220
221        let s = s.slice(2, 3);
222        let select = select.slice(1, 3);
223        let a = nullif(&s, &select).unwrap();
224        let r: Vec<_> = a.as_string::<i32>().iter().collect();
225        assert_eq!(r, vec![None, Some("a"), None]);
226    }
227
228    #[test]
229    fn test_nullif_int_large_left_offset() {
230        let a = Int32Array::from(vec![
231            Some(-1), // 0
232            Some(-1),
233            Some(-1),
234            Some(-1),
235            Some(-1),
236            Some(-1),
237            Some(-1),
238            Some(-1),
239            Some(-1), // 8
240            Some(-1),
241            Some(-1),
242            Some(-1),
243            Some(-1),
244            Some(-1),
245            Some(-1),
246            Some(-1),
247            None,     // 16
248            Some(15), // 17
249            Some(8),
250            Some(1),
251            Some(9),
252        ]);
253        let a = a.slice(17, 3); // Some(15), Some(8), Some(1)
254
255        let comp = BooleanArray::from(vec![
256            Some(false),
257            Some(false),
258            Some(false),
259            None,
260            Some(true),
261            Some(false),
262            None,
263        ]);
264        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
265        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
266        let res = nullif(&a, comp).unwrap();
267        let res = res.as_any().downcast_ref::<Int32Array>().unwrap();
268
269        let expected = Int32Array::from(vec![
270            Some(15), // False => keep it
271            Some(8),  // None => keep it
272            None,     // true => None
273        ]);
274        assert_eq!(&expected, res)
275    }
276
277    #[test]
278    fn test_nullif_int_large_right_offset() {
279        let a = Int32Array::from(vec![
280            None,     // 0
281            Some(15), // 1
282            Some(8),
283            Some(1),
284            Some(9),
285        ]);
286        let a = a.slice(1, 3); // Some(15), Some(8), Some(1)
287
288        let comp = BooleanArray::from(vec![
289            Some(false), // 0
290            Some(false),
291            Some(false),
292            Some(false),
293            Some(false),
294            Some(false),
295            Some(false),
296            Some(false),
297            Some(false), // 8
298            Some(false),
299            Some(false),
300            Some(false),
301            Some(false),
302            Some(false),
303            Some(false),
304            Some(false),
305            Some(false), // 16
306            Some(false), // 17
307            Some(false), // 18
308            None,
309            Some(true),
310            Some(false),
311            None,
312        ]);
313        let comp = comp.slice(18, 3); // Some(false), None, Some(true)
314        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
315        let res = nullif(&a, comp).unwrap();
316        let res = res.as_any().downcast_ref::<Int32Array>().unwrap();
317
318        let expected = Int32Array::from(vec![
319            Some(15), // False => keep it
320            Some(8),  // None => keep it
321            None,     // true => None
322        ]);
323        assert_eq!(&expected, res)
324    }
325
326    #[test]
327    fn test_nullif_boolean_offset() {
328        let a = BooleanArray::from(vec![
329            None,       // 0
330            Some(true), // 1
331            Some(false),
332            Some(true),
333            Some(true),
334        ]);
335        let a = a.slice(1, 3); // Some(true), Some(false), Some(true)
336
337        let comp = BooleanArray::from(vec![
338            Some(false), // 0
339            Some(false), // 1
340            Some(false), // 2
341            None,
342            Some(true),
343            Some(false),
344            None,
345        ]);
346        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
347        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
348        let res = nullif(&a, comp).unwrap();
349        let res = res.as_any().downcast_ref::<BooleanArray>().unwrap();
350
351        let expected = BooleanArray::from(vec![
352            Some(true),  // False => keep it
353            Some(false), // None => keep it
354            None,        // true => None
355        ]);
356        assert_eq!(&expected, res)
357    }
358
359    struct Foo {
360        a: Option<i32>,
361        b: Option<bool>,
362        /// Whether the entry should be valid.
363        is_valid: bool,
364    }
365
366    impl Foo {
367        fn new_valid(a: i32, b: bool) -> Foo {
368            Self {
369                a: Some(a),
370                b: Some(b),
371                is_valid: true,
372            }
373        }
374
375        fn new_null() -> Foo {
376            Self {
377                a: None,
378                b: None,
379                is_valid: false,
380            }
381        }
382    }
383
384    /// Struct Array equality is a bit weird -- we need to have the *child values*
385    /// correct even if the enclosing struct indicates it is null. But we
386    /// also need the top level is_valid bits to be correct.
387    fn create_foo_struct(values: Vec<Foo>) -> StructArray {
388        let mut struct_array = StructBuilder::new(
389            Fields::from(vec![
390                Field::new("a", DataType::Int32, true),
391                Field::new("b", DataType::Boolean, true),
392            ]),
393            vec![
394                Box::new(Int32Builder::with_capacity(values.len())),
395                Box::new(BooleanBuilder::with_capacity(values.len())),
396            ],
397        );
398
399        for value in values {
400            struct_array
401                .field_builder::<Int32Builder>(0)
402                .unwrap()
403                .append_option(value.a);
404            struct_array
405                .field_builder::<BooleanBuilder>(1)
406                .unwrap()
407                .append_option(value.b);
408            struct_array.append(value.is_valid);
409        }
410
411        struct_array.finish()
412    }
413
414    #[test]
415    fn test_nullif_struct_slices() {
416        let struct_array = create_foo_struct(vec![
417            Foo::new_valid(7, true),
418            Foo::new_valid(15, false),
419            Foo::new_valid(8, true),
420            Foo::new_valid(12, false),
421            Foo::new_null(),
422            Foo::new_null(),
423            Foo::new_valid(42, true),
424        ]);
425
426        // Some({a: 15, b: false}), Some({a: 8, b: true}), Some({a: 12, b: false}),
427        // None, None
428        let struct_array = struct_array.slice(1, 5);
429        let comp = BooleanArray::from(vec![
430            Some(false), // 0
431            Some(false), // 1
432            Some(false), // 2
433            None,
434            Some(true),
435            Some(false),
436            None,
437        ]);
438        let comp = comp.slice(2, 5); // Some(false), None, Some(true), Some(false), None
439        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
440        let res = nullif(&struct_array, comp).unwrap();
441        let res = res.as_any().downcast_ref::<StructArray>().unwrap();
442
443        let expected = create_foo_struct(vec![
444            // Some(false) -> keep
445            Foo::new_valid(15, false),
446            // None -> keep
447            Foo::new_valid(8, true),
448            // Some(true) -> null out. But child values are still there.
449            Foo {
450                a: Some(12),
451                b: Some(false),
452                is_valid: false,
453            },
454            // Some(false) -> keep, but was null
455            Foo::new_null(),
456            // None -> keep, but was null
457            Foo::new_null(),
458        ]);
459
460        assert_eq!(&expected, res);
461    }
462
463    #[test]
464    fn test_nullif_no_nulls() {
465        let a = Int32Array::from(vec![Some(15), Some(7), Some(8), Some(1), Some(9)]);
466        let comp = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
467        let res = nullif(&a, &comp).unwrap();
468        let res = res.as_primitive::<Int32Type>();
469
470        let expected = Int32Array::from(vec![Some(15), Some(7), None, Some(1), Some(9)]);
471        assert_eq!(res, &expected);
472    }
473
474    #[test]
475    fn nullif_empty() {
476        let a = Int32Array::from(ArrayData::new_empty(&DataType::Int32));
477        let mask = BooleanArray::from(ArrayData::new_empty(&DataType::Boolean));
478        let res = nullif(&a, &mask).unwrap();
479        assert_eq!(res.as_ref(), &a);
480    }
481
482    fn test_nullif(values: &Int32Array, filter: &BooleanArray) {
483        let expected: Int32Array = values
484            .iter()
485            .zip(filter.iter())
486            .map(|(a, b)| match b {
487                Some(true) => None,
488                Some(false) | None => a,
489            })
490            .collect();
491
492        let r = nullif(values, filter).unwrap();
493        let r_data = r.to_data();
494        r_data.validate().unwrap();
495
496        assert_eq!(
497            r.as_ref(),
498            &expected,
499            "expected nulls: {:#?}\n\n\
500        result nulls:   {:#?}\n\n\\
501        expected values: {:#?}\n\n\
502        result values:   {:#?}",
503            expected.nulls(),
504            r.nulls(),
505            expected.values(),
506            r.as_primitive::<Int32Type>().values()
507        );
508        validate_nulls(expected.nulls());
509        validate_nulls(r.nulls());
510    }
511
512    /// Ensures that the null count matches the actual number of nulls.
513    fn validate_nulls(nulls: Option<&NullBuffer>) {
514        let Some(nulls) = nulls else {
515            return;
516        };
517        let mut actual_null_count = 0;
518        for i in 0..nulls.len() {
519            if nulls.is_null(i) {
520                actual_null_count += 1;
521            }
522        }
523        assert_eq!(actual_null_count, nulls.null_count());
524    }
525
526    #[test]
527    fn nullif_fuzz() {
528        let mut rng = StdRng::seed_from_u64(7337);
529
530        let arrays = [
531            Int32Array::from(vec![0; 1024]), // no nulls
532            (0..1024) // 50% nulls
533                .map(|_| rng.random_bool(0.5).then_some(1))
534                .collect(),
535        ];
536
537        for a in arrays {
538            let a_slices = [
539                (0, 128),
540                (0, 129),
541                (64, 64),
542                (0, 64),
543                (32, 32),
544                (0, 0),
545                (32, 0),
546                (5, 800),
547                (33, 53),
548                (77, 101),
549            ];
550            for (a_offset, a_length) in a_slices {
551                let a = a.slice(a_offset, a_length);
552
553                for i in 1..65 {
554                    let b_start_offset = rng.random_range(0..i);
555                    let b_end_offset = rng.random_range(0..i);
556
557                    // b with 50% nulls
558                    let b: BooleanArray = (0..a_length + b_start_offset + b_end_offset)
559                        .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5)))
560                        .collect();
561                    let b_sliced = b.slice(b_start_offset, a_length);
562                    test_nullif(&a, &b_sliced);
563
564                    // b with no nulls (and no null buffer)
565                    let b = remove_null_buffer(&b);
566                    let b_sliced = b.slice(b_start_offset, a_length);
567                    test_nullif(&a, &b_sliced);
568
569                    // b with no nulls (but with a null buffer)
570                    let b = remove_null_values(&b);
571                    let b_sliced = b.slice(b_start_offset, a_length);
572                    test_nullif(&a, &b_sliced);
573                }
574            }
575        }
576    }
577
578    /// Returns a new BooleanArray with no null buffer
579    fn remove_null_buffer(array: &BooleanArray) -> BooleanArray {
580        make_array(
581            array
582                .into_data()
583                .into_builder()
584                .nulls(None)
585                .build()
586                .unwrap(),
587        )
588        .as_boolean()
589        .clone()
590    }
591
592    /// Returns a new BooleanArray with a null buffer where all values are valid
593    fn remove_null_values(array: &BooleanArray) -> BooleanArray {
594        let len = array.len();
595        let new_nulls = NullBuffer::from_iter(std::iter::repeat_n(true, len));
596        make_array(
597            array
598                .into_data()
599                .into_builder()
600                .nulls(Some(new_nulls))
601                .build()
602                .unwrap(),
603        )
604        .as_boolean()
605        .clone()
606    }
607}