arrow_select/
nullif.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Implements the `nullif` function for Arrow arrays.
19
20use arrow_array::{Array, ArrayRef, BooleanArray, make_array};
21use arrow_buffer::buffer::bitwise_bin_op_helper;
22use arrow_buffer::{BooleanBuffer, NullBuffer};
23use arrow_schema::{ArrowError, DataType};
24
25/// Returns a new array with the same values and the validity bit to false where
26/// the corresponding element of`right` is true.
27///
28/// This can be used to implement SQL `NULLIF`
29///
30/// # Example
31/// ```
32/// # use arrow_array::{Int32Array, BooleanArray};
33/// # use arrow_array::cast::AsArray;
34/// # use arrow_array::types::Int32Type;
35/// # use arrow_select::nullif::nullif;
36/// // input is [null, 8, 1, 9]
37/// let a = Int32Array::from(vec![None, Some(8), Some(1), Some(9)]);
38/// // use nullif to set index 1 to null
39/// let bool_array = BooleanArray::from(vec![Some(false), Some(true), Some(false), None]);
40/// let nulled = nullif(&a, &bool_array).unwrap();
41/// // The resulting array is [null, null, 1, 9]
42/// assert_eq!(nulled.as_primitive(), &Int32Array::from(vec![None, None, Some(1), Some(9)]));
43/// ```
44pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowError> {
45    let left_data = left.to_data();
46
47    if left_data.len() != right.len() {
48        return Err(ArrowError::ComputeError(
49            "Cannot perform comparison operation on arrays of different length".to_string(),
50        ));
51    }
52    let len = left_data.len();
53
54    if len == 0 || left_data.data_type() == &DataType::Null {
55        return Ok(make_array(left_data));
56    }
57
58    // left=0 (null)   right=null       output bitmap=null
59    // left=0          right=1          output bitmap=null
60    // left=1 (set)    right=null       output bitmap=set   (passthrough)
61    // left=1          right=1 & comp=true    output bitmap=null
62    // left=1          right=1 & comp=false   output bitmap=set
63    //
64    // Thus: result = left null bitmap & (!right_values | !right_bitmap)
65    //              OR left null bitmap & !(right_values & right_bitmap)
66
67    // Compute right_values & right_bitmap
68    let right = match right.nulls() {
69        Some(nulls) => right.values() & nulls.inner(),
70        None => right.values().clone(),
71    };
72
73    // Compute left null bitmap & !right
74
75    let (combined, null_count) = match left_data.nulls() {
76        Some(left) => {
77            let mut valid_count = 0;
78            let b = bitwise_bin_op_helper(
79                left.buffer(),
80                left.offset(),
81                right.inner(),
82                right.offset(),
83                len,
84                |l, r| {
85                    let t = l & !r;
86                    valid_count += t.count_ones() as usize;
87                    t
88                },
89            );
90            (b, len - valid_count)
91        }
92        None => {
93            let mut null_count = 0;
94            let buffer =
95                BooleanBuffer::from_bitwise_unary_op(right.inner(), right.offset(), len, |b| {
96                    let t = !b;
97                    null_count += t.count_zeros() as usize;
98                    t
99                })
100                .into_inner();
101            (buffer, null_count)
102        }
103    };
104
105    let combined = BooleanBuffer::new(combined, 0, len);
106    // Safety:
107    // Counted nulls whilst computing
108    let nulls = unsafe { NullBuffer::new_unchecked(combined, null_count) };
109    let data = left_data.into_builder().nulls(Some(nulls));
110
111    // SAFETY:
112    // Only altered null mask
113    Ok(make_array(unsafe { data.build_unchecked() }))
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119    use arrow_array::builder::{BooleanBuilder, Int32Builder, StructBuilder};
120    use arrow_array::cast::AsArray;
121    use arrow_array::types::Int32Type;
122    use arrow_array::{Int32Array, NullArray, StringArray, StructArray};
123    use arrow_data::ArrayData;
124    use arrow_schema::{Field, Fields};
125    use rand::{Rng, rng};
126
127    #[test]
128    fn test_nullif_int_array() {
129        let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9)]);
130        let comp = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
131        let res = nullif(&a, &comp).unwrap();
132
133        let expected = Int32Array::from(vec![
134            Some(15),
135            None,
136            None, // comp true, slot 2 turned into null
137            Some(1),
138            // Even though comp array / right is null, should still pass through original value
139            // comp true, slot 2 turned into null
140            Some(9),
141        ]);
142
143        let res = res.as_primitive::<Int32Type>();
144        assert_eq!(&expected, res);
145    }
146
147    #[test]
148    fn test_nullif_null_array() {
149        assert_eq!(
150            nullif(&NullArray::new(0), &BooleanArray::new_null(0))
151                .unwrap()
152                .as_ref(),
153            &NullArray::new(0)
154        );
155
156        assert_eq!(
157            nullif(
158                &NullArray::new(3),
159                &BooleanArray::from(vec![Some(false), Some(true), None]),
160            )
161            .unwrap()
162            .as_ref(),
163            &NullArray::new(3)
164        );
165    }
166
167    #[test]
168    fn test_nullif_int_array_offset() {
169        let a = Int32Array::from(vec![None, Some(15), Some(8), Some(1), Some(9)]);
170        let a = a.slice(1, 3); // Some(15), Some(8), Some(1)
171        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
172        let comp = BooleanArray::from(vec![
173            Some(false),
174            Some(false),
175            Some(false),
176            None,
177            Some(true),
178            Some(false),
179            None,
180        ]);
181        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
182        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
183        let res = nullif(a, comp).unwrap();
184
185        let expected = Int32Array::from(vec![
186            Some(15), // False => keep it
187            Some(8),  // None => keep it
188            None,     // true => None
189        ]);
190        let res = res.as_primitive::<Int32Type>();
191        assert_eq!(&expected, res)
192    }
193
194    #[test]
195    fn test_nullif_string() {
196        let s = StringArray::from_iter([
197            Some("hello"),
198            None,
199            Some("world"),
200            Some("a"),
201            Some("b"),
202            None,
203            None,
204        ]);
205        let select = BooleanArray::from_iter([
206            Some(true),
207            Some(true),
208            Some(false),
209            Some(true),
210            Some(false),
211            Some(false),
212            None,
213        ]);
214
215        let a = nullif(&s, &select).unwrap();
216        let r: Vec<_> = a.as_string::<i32>().iter().collect();
217        assert_eq!(
218            r,
219            vec![None, None, Some("world"), None, Some("b"), None, None]
220        );
221
222        let s = s.slice(2, 3);
223        let select = select.slice(1, 3);
224        let a = nullif(&s, &select).unwrap();
225        let r: Vec<_> = a.as_string::<i32>().iter().collect();
226        assert_eq!(r, vec![None, Some("a"), None]);
227    }
228
229    #[test]
230    fn test_nullif_int_large_left_offset() {
231        let a = Int32Array::from(vec![
232            Some(-1), // 0
233            Some(-1),
234            Some(-1),
235            Some(-1),
236            Some(-1),
237            Some(-1),
238            Some(-1),
239            Some(-1),
240            Some(-1), // 8
241            Some(-1),
242            Some(-1),
243            Some(-1),
244            Some(-1),
245            Some(-1),
246            Some(-1),
247            Some(-1),
248            None,     // 16
249            Some(15), // 17
250            Some(8),
251            Some(1),
252            Some(9),
253        ]);
254        let a = a.slice(17, 3); // Some(15), Some(8), Some(1)
255
256        let comp = BooleanArray::from(vec![
257            Some(false),
258            Some(false),
259            Some(false),
260            None,
261            Some(true),
262            Some(false),
263            None,
264        ]);
265        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
266        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
267        let res = nullif(&a, comp).unwrap();
268        let res = res.as_any().downcast_ref::<Int32Array>().unwrap();
269
270        let expected = Int32Array::from(vec![
271            Some(15), // False => keep it
272            Some(8),  // None => keep it
273            None,     // true => None
274        ]);
275        assert_eq!(&expected, res)
276    }
277
278    #[test]
279    fn test_nullif_int_large_right_offset() {
280        let a = Int32Array::from(vec![
281            None,     // 0
282            Some(15), // 1
283            Some(8),
284            Some(1),
285            Some(9),
286        ]);
287        let a = a.slice(1, 3); // Some(15), Some(8), Some(1)
288
289        let comp = BooleanArray::from(vec![
290            Some(false), // 0
291            Some(false),
292            Some(false),
293            Some(false),
294            Some(false),
295            Some(false),
296            Some(false),
297            Some(false),
298            Some(false), // 8
299            Some(false),
300            Some(false),
301            Some(false),
302            Some(false),
303            Some(false),
304            Some(false),
305            Some(false),
306            Some(false), // 16
307            Some(false), // 17
308            Some(false), // 18
309            None,
310            Some(true),
311            Some(false),
312            None,
313        ]);
314        let comp = comp.slice(18, 3); // Some(false), None, Some(true)
315        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
316        let res = nullif(&a, comp).unwrap();
317        let res = res.as_any().downcast_ref::<Int32Array>().unwrap();
318
319        let expected = Int32Array::from(vec![
320            Some(15), // False => keep it
321            Some(8),  // None => keep it
322            None,     // true => None
323        ]);
324        assert_eq!(&expected, res)
325    }
326
327    #[test]
328    fn test_nullif_boolean_offset() {
329        let a = BooleanArray::from(vec![
330            None,       // 0
331            Some(true), // 1
332            Some(false),
333            Some(true),
334            Some(true),
335        ]);
336        let a = a.slice(1, 3); // Some(true), Some(false), Some(true)
337
338        let comp = BooleanArray::from(vec![
339            Some(false), // 0
340            Some(false), // 1
341            Some(false), // 2
342            None,
343            Some(true),
344            Some(false),
345            None,
346        ]);
347        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
348        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
349        let res = nullif(&a, comp).unwrap();
350        let res = res.as_any().downcast_ref::<BooleanArray>().unwrap();
351
352        let expected = BooleanArray::from(vec![
353            Some(true),  // False => keep it
354            Some(false), // None => keep it
355            None,        // true => None
356        ]);
357        assert_eq!(&expected, res)
358    }
359
360    struct Foo {
361        a: Option<i32>,
362        b: Option<bool>,
363        /// Whether the entry should be valid.
364        is_valid: bool,
365    }
366
367    impl Foo {
368        fn new_valid(a: i32, b: bool) -> Foo {
369            Self {
370                a: Some(a),
371                b: Some(b),
372                is_valid: true,
373            }
374        }
375
376        fn new_null() -> Foo {
377            Self {
378                a: None,
379                b: None,
380                is_valid: false,
381            }
382        }
383    }
384
385    /// Struct Array equality is a bit weird -- we need to have the *child values*
386    /// correct even if the enclosing struct indicates it is null. But we
387    /// also need the top level is_valid bits to be correct.
388    fn create_foo_struct(values: Vec<Foo>) -> StructArray {
389        let mut struct_array = StructBuilder::new(
390            Fields::from(vec![
391                Field::new("a", DataType::Int32, true),
392                Field::new("b", DataType::Boolean, true),
393            ]),
394            vec![
395                Box::new(Int32Builder::with_capacity(values.len())),
396                Box::new(BooleanBuilder::with_capacity(values.len())),
397            ],
398        );
399
400        for value in values {
401            struct_array
402                .field_builder::<Int32Builder>(0)
403                .unwrap()
404                .append_option(value.a);
405            struct_array
406                .field_builder::<BooleanBuilder>(1)
407                .unwrap()
408                .append_option(value.b);
409            struct_array.append(value.is_valid);
410        }
411
412        struct_array.finish()
413    }
414
415    #[test]
416    fn test_nullif_struct_slices() {
417        let struct_array = create_foo_struct(vec![
418            Foo::new_valid(7, true),
419            Foo::new_valid(15, false),
420            Foo::new_valid(8, true),
421            Foo::new_valid(12, false),
422            Foo::new_null(),
423            Foo::new_null(),
424            Foo::new_valid(42, true),
425        ]);
426
427        // Some({a: 15, b: false}), Some({a: 8, b: true}), Some({a: 12, b: false}),
428        // None, None
429        let struct_array = struct_array.slice(1, 5);
430        let comp = BooleanArray::from(vec![
431            Some(false), // 0
432            Some(false), // 1
433            Some(false), // 2
434            None,
435            Some(true),
436            Some(false),
437            None,
438        ]);
439        let comp = comp.slice(2, 5); // Some(false), None, Some(true), Some(false), None
440        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
441        let res = nullif(&struct_array, comp).unwrap();
442        let res = res.as_any().downcast_ref::<StructArray>().unwrap();
443
444        let expected = create_foo_struct(vec![
445            // Some(false) -> keep
446            Foo::new_valid(15, false),
447            // None -> keep
448            Foo::new_valid(8, true),
449            // Some(true) -> null out. But child values are still there.
450            Foo {
451                a: Some(12),
452                b: Some(false),
453                is_valid: false,
454            },
455            // Some(false) -> keep, but was null
456            Foo::new_null(),
457            // None -> keep, but was null
458            Foo::new_null(),
459        ]);
460
461        assert_eq!(&expected, res);
462    }
463
464    #[test]
465    fn test_nullif_no_nulls() {
466        let a = Int32Array::from(vec![Some(15), Some(7), Some(8), Some(1), Some(9)]);
467        let comp = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
468        let res = nullif(&a, &comp).unwrap();
469        let res = res.as_primitive::<Int32Type>();
470
471        let expected = Int32Array::from(vec![Some(15), Some(7), None, Some(1), Some(9)]);
472        assert_eq!(res, &expected);
473    }
474
475    #[test]
476    fn nullif_empty() {
477        let a = Int32Array::from(ArrayData::new_empty(&DataType::Int32));
478        let mask = BooleanArray::from(ArrayData::new_empty(&DataType::Boolean));
479        let res = nullif(&a, &mask).unwrap();
480        assert_eq!(res.as_ref(), &a);
481    }
482
483    fn test_nullif(values: &Int32Array, filter: &BooleanArray) {
484        let expected: Int32Array = values
485            .iter()
486            .zip(filter.iter())
487            .map(|(a, b)| match b {
488                Some(true) => None,
489                Some(false) | None => a,
490            })
491            .collect();
492
493        let r = nullif(values, filter).unwrap();
494        let r_data = r.to_data();
495        r_data.validate().unwrap();
496
497        assert_eq!(r.as_ref(), &expected);
498    }
499
500    #[test]
501    fn nullif_fuzz() {
502        let mut rng = rng();
503
504        let arrays = [
505            Int32Array::from(vec![0; 128]),
506            (0..128)
507                .map(|_| rng.random_bool(0.5).then_some(0))
508                .collect(),
509        ];
510
511        for a in arrays {
512            let a_slices = [(0, 128), (64, 64), (0, 64), (32, 32), (0, 0), (32, 0)];
513
514            for (a_offset, a_length) in a_slices {
515                let a = a.slice(a_offset, a_length);
516
517                for i in 1..65 {
518                    let b_start_offset = rng.random_range(0..i);
519                    let b_end_offset = rng.random_range(0..i);
520
521                    let b: BooleanArray = (0..a_length + b_start_offset + b_end_offset)
522                        .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5)))
523                        .collect();
524                    let b = b.slice(b_start_offset, a_length);
525
526                    test_nullif(&a, &b);
527                }
528            }
529        }
530    }
531}