arrow_data/equal/list.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::data::{count_nulls, ArrayData};
19use arrow_buffer::ArrowNativeType;
20use num::Integer;
21
22use super::equal_range;
23
24fn lengths_equal<T: ArrowNativeType + Integer>(lhs: &[T], rhs: &[T]) -> bool {
25 // invariant from `base_equal`
26 debug_assert_eq!(lhs.len(), rhs.len());
27
28 if lhs.is_empty() {
29 return true;
30 }
31
32 if lhs[0] == T::zero() && rhs[0] == T::zero() {
33 return lhs == rhs;
34 };
35
36 // The expensive case, e.g.
37 // [0, 2, 4, 6, 9] == [4, 6, 8, 10, 13]
38 lhs.windows(2)
39 .zip(rhs.windows(2))
40 .all(|(lhs_offsets, rhs_offsets)| {
41 // length of left == length of right
42 (lhs_offsets[1] - lhs_offsets[0]) == (rhs_offsets[1] - rhs_offsets[0])
43 })
44}
45
46pub(super) fn list_equal<T: ArrowNativeType + Integer>(
47 lhs: &ArrayData,
48 rhs: &ArrayData,
49 lhs_start: usize,
50 rhs_start: usize,
51 len: usize,
52) -> bool {
53 let lhs_offsets = lhs.buffer::<T>(0);
54 let rhs_offsets = rhs.buffer::<T>(0);
55
56 // There is an edge-case where a n-length list that has 0 children, results in panics.
57 // For example; an array with offsets [0, 0, 0, 0, 0] has 4 slots, but will have
58 // no valid children.
59 // Under logical equality, the child null bitmap will be an empty buffer, as there are
60 // no child values. This causes panics when trying to count set bits.
61 //
62 // We caught this by chance from an accidental test-case, but due to the nature of this
63 // crash only occurring on list equality checks, we are adding a check here, instead of
64 // on the buffer/bitmap utilities, as a length check would incur a penalty for almost all
65 // other use-cases.
66 //
67 // The solution is to check the number of child values from offsets, and return `true` if
68 // they = 0. Empty arrays are equal, so this is correct.
69 //
70 // It's unlikely that one would create a n-length list array with no values, where n > 0,
71 // however, one is more likely to slice into a list array and get a region that has 0
72 // child values.
73 // The test that triggered this behaviour had [4, 4] as a slice of 1 value slot.
74 // For the edge case that zero length list arrays are always equal.
75 if len == 0 {
76 return true;
77 }
78
79 let lhs_child_length = lhs_offsets[lhs_start + len].to_usize().unwrap()
80 - lhs_offsets[lhs_start].to_usize().unwrap();
81
82 let rhs_child_length = rhs_offsets[rhs_start + len].to_usize().unwrap()
83 - rhs_offsets[rhs_start].to_usize().unwrap();
84
85 if lhs_child_length == 0 && lhs_child_length == rhs_child_length {
86 return true;
87 }
88
89 let lhs_values = &lhs.child_data()[0];
90 let rhs_values = &rhs.child_data()[0];
91
92 let lhs_null_count = count_nulls(lhs.nulls(), lhs_start, len);
93 let rhs_null_count = count_nulls(rhs.nulls(), rhs_start, len);
94
95 if lhs_null_count != rhs_null_count {
96 return false;
97 }
98
99 if lhs_null_count == 0 && rhs_null_count == 0 {
100 lhs_child_length == rhs_child_length
101 && lengths_equal(
102 &lhs_offsets[lhs_start..lhs_start + len],
103 &rhs_offsets[rhs_start..rhs_start + len],
104 )
105 && equal_range(
106 lhs_values,
107 rhs_values,
108 lhs_offsets[lhs_start].to_usize().unwrap(),
109 rhs_offsets[rhs_start].to_usize().unwrap(),
110 lhs_child_length,
111 )
112 } else {
113 // get a ref of the parent null buffer bytes, to use in testing for nullness
114 let lhs_nulls = lhs.nulls().unwrap();
115 let rhs_nulls = rhs.nulls().unwrap();
116
117 // with nulls, we need to compare item by item whenever it is not null
118 // TODO: Could potentially compare runs of not NULL values
119 (0..len).all(|i| {
120 let lhs_pos = lhs_start + i;
121 let rhs_pos = rhs_start + i;
122
123 let lhs_is_null = lhs_nulls.is_null(lhs_pos);
124 let rhs_is_null = rhs_nulls.is_null(rhs_pos);
125
126 if lhs_is_null != rhs_is_null {
127 return false;
128 }
129
130 let lhs_offset_start = lhs_offsets[lhs_pos].to_usize().unwrap();
131 let lhs_offset_end = lhs_offsets[lhs_pos + 1].to_usize().unwrap();
132 let rhs_offset_start = rhs_offsets[rhs_pos].to_usize().unwrap();
133 let rhs_offset_end = rhs_offsets[rhs_pos + 1].to_usize().unwrap();
134
135 let lhs_len = lhs_offset_end - lhs_offset_start;
136 let rhs_len = rhs_offset_end - rhs_offset_start;
137
138 lhs_is_null
139 || (lhs_len == rhs_len
140 && equal_range(
141 lhs_values,
142 rhs_values,
143 lhs_offset_start,
144 rhs_offset_start,
145 lhs_len,
146 ))
147 })
148 }
149}