Skip to main content

arrow_buffer/buffer/
ops.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use super::{Buffer, MutableBuffer};
19use crate::BooleanBuffer;
20use crate::util::bit_util::ceil;
21
22/// Apply a bitwise operation `op` to four inputs and return the result as a Buffer.
23///
24/// The inputs are treated as bitmaps, meaning that offsets and length are
25/// specified in number of bits.
26///
27/// NOTE: The operation `op` is applied to chunks of 64 bits (u64) and any bits
28/// outside the offsets and len are set to zero out before calling `op`.
29pub fn bitwise_quaternary_op_helper<F>(
30    buffers: [&Buffer; 4],
31    offsets: [usize; 4],
32    len_in_bits: usize,
33    op: F,
34) -> Buffer
35where
36    F: Fn(u64, u64, u64, u64) -> u64,
37{
38    let first_chunks = buffers[0].bit_chunks(offsets[0], len_in_bits);
39    let second_chunks = buffers[1].bit_chunks(offsets[1], len_in_bits);
40    let third_chunks = buffers[2].bit_chunks(offsets[2], len_in_bits);
41    let fourth_chunks = buffers[3].bit_chunks(offsets[3], len_in_bits);
42
43    let chunks = first_chunks
44        .iter()
45        .zip(second_chunks.iter())
46        .zip(third_chunks.iter())
47        .zip(fourth_chunks.iter())
48        .map(|(((first, second), third), fourth)| op(first, second, third, fourth));
49    // Soundness: `BitChunks` is a `BitChunks` iterator which
50    // correctly reports its upper bound
51    let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
52
53    let remainder_bytes = ceil(first_chunks.remainder_len(), 8);
54    let rem = op(
55        first_chunks.remainder_bits(),
56        second_chunks.remainder_bits(),
57        third_chunks.remainder_bits(),
58        fourth_chunks.remainder_bits(),
59    );
60    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
61    let rem = &rem.to_le_bytes()[0..remainder_bytes];
62    buffer.extend_from_slice(rem);
63
64    buffer.into()
65}
66
67/// Apply a bitwise operation `op` to two inputs and return the result as a Buffer.
68///
69/// The inputs are treated as bitmaps, meaning that offsets and length are
70/// specified in number of bits.
71///
72/// NOTE: The operation `op` is applied to chunks of 64 bits (u64) and any bits
73/// outside the offsets and len are set to zero out before calling `op`.
74pub fn bitwise_bin_op_helper<F>(
75    left: &Buffer,
76    left_offset_in_bits: usize,
77    right: &Buffer,
78    right_offset_in_bits: usize,
79    len_in_bits: usize,
80    mut op: F,
81) -> Buffer
82where
83    F: FnMut(u64, u64) -> u64,
84{
85    let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits);
86    let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits);
87
88    let chunks = left_chunks
89        .iter()
90        .zip(right_chunks.iter())
91        .map(|(left, right)| op(left, right));
92    // Soundness: `BitChunks` is a `BitChunks` iterator which
93    // correctly reports its upper bound
94    let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
95
96    let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
97    let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
98    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
99    let rem = &rem.to_le_bytes()[0..remainder_bytes];
100    buffer.extend_from_slice(rem);
101
102    buffer.into()
103}
104
105/// Apply a bitwise operation `op` to one input and return the result as a Buffer.
106///
107/// The input is treated as a bitmap, meaning that offset and length are
108/// specified in number of bits.
109///
110/// NOTE: The operation `op` is applied to chunks of 64 bits (u64) and any bits
111/// outside the offsets and len are set to zero out before calling `op`.
112pub fn bitwise_unary_op_helper<F>(
113    left: &Buffer,
114    offset_in_bits: usize,
115    len_in_bits: usize,
116    mut op: F,
117) -> Buffer
118where
119    F: FnMut(u64) -> u64,
120{
121    // reserve capacity and set length so we can get a typed view of u64 chunks
122    let mut result =
123        MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
124
125    let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits);
126
127    let result_chunks = result.typed_data_mut::<u64>().iter_mut();
128
129    result_chunks
130        .zip(left_chunks.iter())
131        .for_each(|(res, left)| {
132            *res = op(left);
133        });
134
135    let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
136    let rem = op(left_chunks.remainder_bits());
137    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
138    let rem = &rem.to_le_bytes()[0..remainder_bytes];
139    result.extend_from_slice(rem);
140
141    result.into()
142}
143
144/// Apply a bitwise and to two inputs and return the result as a Buffer.
145/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
146///
147/// # See Also
148/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly
149pub fn buffer_bin_and(
150    left: &Buffer,
151    left_offset_in_bits: usize,
152    right: &Buffer,
153    right_offset_in_bits: usize,
154    len_in_bits: usize,
155) -> Buffer {
156    let result = BooleanBuffer::from_bitwise_binary_op(
157        left,
158        left_offset_in_bits,
159        right,
160        right_offset_in_bits,
161        len_in_bits,
162        |a, b| a & b,
163    );
164    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
165    if result.offset() == 0 {
166        result.into_inner()
167    } else {
168        result.sliced()
169    }
170}
171
172/// Apply a bitwise or to two inputs and return the result as a Buffer.
173/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
174///
175/// # See Also
176/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly
177pub fn buffer_bin_or(
178    left: &Buffer,
179    left_offset_in_bits: usize,
180    right: &Buffer,
181    right_offset_in_bits: usize,
182    len_in_bits: usize,
183) -> Buffer {
184    let result = BooleanBuffer::from_bitwise_binary_op(
185        left,
186        left_offset_in_bits,
187        right,
188        right_offset_in_bits,
189        len_in_bits,
190        |a, b| a | b,
191    );
192    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
193    if result.offset() == 0 {
194        result.into_inner()
195    } else {
196        result.sliced()
197    }
198}
199
200/// Apply a bitwise xor to two inputs and return the result as a Buffer.
201/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
202///
203/// # See Also
204/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly
205pub fn buffer_bin_xor(
206    left: &Buffer,
207    left_offset_in_bits: usize,
208    right: &Buffer,
209    right_offset_in_bits: usize,
210    len_in_bits: usize,
211) -> Buffer {
212    let result = BooleanBuffer::from_bitwise_binary_op(
213        left,
214        left_offset_in_bits,
215        right,
216        right_offset_in_bits,
217        len_in_bits,
218        |a, b| a ^ b,
219    );
220    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
221    if result.offset() == 0 {
222        result.into_inner()
223    } else {
224        result.sliced()
225    }
226}
227
228/// Apply a bitwise and_not to two inputs and return the result as a Buffer.
229/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
230///
231/// # See Also
232/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly
233pub fn buffer_bin_and_not(
234    left: &Buffer,
235    left_offset_in_bits: usize,
236    right: &Buffer,
237    right_offset_in_bits: usize,
238    len_in_bits: usize,
239) -> Buffer {
240    let result = BooleanBuffer::from_bitwise_binary_op(
241        left,
242        left_offset_in_bits,
243        right,
244        right_offset_in_bits,
245        len_in_bits,
246        |a, b| a & !b,
247    );
248    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
249    if result.offset() == 0 {
250        result.into_inner()
251    } else {
252        result.sliced()
253    }
254}
255
256/// Apply a bitwise not to one input and return the result as a Buffer.
257/// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
258///
259/// # See Also
260/// * [`BooleanBuffer::from_bitwise_unary_op`] for creating `BooleanBuffer`s directly
261pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: usize) -> Buffer {
262    BooleanBuffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, |a| !a).into_inner()
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_buffer_bin_ops_return_zero_offset_buffers() {
271        let left = Buffer::from(vec![0b1010_1100, 0b0110_1001]);
272        let right = Buffer::from(vec![0, 0, 0, 0, 0, 0, 0, 0, 0b1110_0101, 0b0101_1000]);
273
274        let left_offset = 1;
275        let right_offset = 65; // same mod 64 as left_offset, so from_bitwise_binary_op returns non-zero offset
276        let len = 7;
277
278        // Reuse the same offset scenario for all four binary wrappers:
279        // each wrapper should return the logically equivalent offset-0 Buffer,
280        // even though the underlying BooleanBuffer result has offset 1.
281        for (op, wrapper) in [
282            (
283                (|a, b| a & b) as fn(u64, u64) -> u64,
284                buffer_bin_and as fn(&Buffer, usize, &Buffer, usize, usize) -> Buffer,
285            ),
286            (((|a, b| a | b) as fn(u64, u64) -> u64), buffer_bin_or),
287            (((|a, b| a ^ b) as fn(u64, u64) -> u64), buffer_bin_xor),
288            (((|a, b| a & !b) as fn(u64, u64) -> u64), buffer_bin_and_not),
289        ] {
290            let unsliced = BooleanBuffer::from_bitwise_binary_op(
291                &left,
292                left_offset,
293                &right,
294                right_offset,
295                len,
296                op,
297            );
298            assert_eq!(unsliced.offset(), 1);
299
300            let result = wrapper(&left, left_offset, &right, right_offset, len);
301
302            assert_eq!(result, unsliced.sliced());
303            assert_eq!(result.len(), 1);
304        }
305    }
306}