arrow_data/equal/primitive.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::bit_iterator::BitSliceIterator;
19use crate::contains_nulls;
20use std::mem::size_of;
21
22use crate::data::ArrayData;
23
24use super::utils::equal_len;
25
26pub(crate) const NULL_SLICES_SELECTIVITY_THRESHOLD: f64 = 0.4;
27
28pub(super) fn primitive_equal<T>(
29 lhs: &ArrayData,
30 rhs: &ArrayData,
31 lhs_start: usize,
32 rhs_start: usize,
33 len: usize,
34) -> bool {
35 let byte_width = size_of::<T>();
36 let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * byte_width..];
37 let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * byte_width..];
38
39 // Only checking one null mask here because by the time the control flow reaches
40 // this point, the equality of the two masks would have already been verified.
41 if !contains_nulls(lhs.nulls(), lhs_start, len) {
42 // without nulls, we just need to compare slices
43 equal_len(
44 lhs_values,
45 rhs_values,
46 lhs_start * byte_width,
47 rhs_start * byte_width,
48 len * byte_width,
49 )
50 } else {
51 let selectivity_frac = lhs.null_count() as f64 / lhs.len() as f64;
52
53 if selectivity_frac >= NULL_SLICES_SELECTIVITY_THRESHOLD {
54 // get a ref of the null buffer bytes, to use in testing for nullness
55 let lhs_nulls = lhs.nulls().unwrap();
56 let rhs_nulls = rhs.nulls().unwrap();
57 // with nulls, we need to compare item by item whenever it is not null
58 (0..len).all(|i| {
59 let lhs_pos = lhs_start + i;
60 let rhs_pos = rhs_start + i;
61 let lhs_is_null = lhs_nulls.is_null(lhs_pos);
62 let rhs_is_null = rhs_nulls.is_null(rhs_pos);
63
64 lhs_is_null
65 || (lhs_is_null == rhs_is_null)
66 && equal_len(
67 lhs_values,
68 rhs_values,
69 lhs_pos * byte_width,
70 rhs_pos * byte_width,
71 byte_width, // 1 * byte_width since we are comparing a single entry
72 )
73 })
74 } else {
75 let lhs_nulls = lhs.nulls().unwrap();
76 let lhs_slices_iter =
77 BitSliceIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len);
78 let rhs_nulls = rhs.nulls().unwrap();
79 let rhs_slices_iter =
80 BitSliceIterator::new(rhs_nulls.validity(), rhs_start + rhs_nulls.offset(), len);
81
82 lhs_slices_iter
83 .zip(rhs_slices_iter)
84 .all(|((l_start, l_end), (r_start, r_end))| {
85 l_start == r_start
86 && l_end == r_end
87 && equal_len(
88 lhs_values,
89 rhs_values,
90 (lhs_start + l_start) * byte_width,
91 (rhs_start + r_start) * byte_width,
92 (l_end - l_start) * byte_width,
93 )
94 })
95 }
96 }
97}