arrow_buffer/util/
bit_util.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Utils for working with bits
19
20/// Returns the nearest number that is `>=` than `num` and is a multiple of 64
21#[inline]
22pub fn round_upto_multiple_of_64(num: usize) -> usize {
23    num.checked_next_multiple_of(64)
24        .expect("failed to round upto multiple of 64")
25}
26
27/// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must
28/// be a power of 2.
29pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize {
30    debug_assert!(factor > 0 && factor.is_power_of_two());
31    num.checked_add(factor - 1)
32        .expect("failed to round to next highest power of 2")
33        & !(factor - 1)
34}
35
36/// Returns whether bit at position `i` in `data` is set or not
37#[inline]
38pub fn get_bit(data: &[u8], i: usize) -> bool {
39    data[i / 8] & (1 << (i % 8)) != 0
40}
41
42/// Returns whether bit at position `i` in `data` is set or not.
43///
44/// # Safety
45///
46/// Note this doesn't do any bound checking, for performance reason. The caller is
47/// responsible to guarantee that `i` is within bounds.
48#[inline]
49pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool {
50    (*data.add(i / 8) & (1 << (i % 8))) != 0
51}
52
53/// Sets bit at position `i` for `data` to 1
54#[inline]
55pub fn set_bit(data: &mut [u8], i: usize) {
56    data[i / 8] |= 1 << (i % 8);
57}
58
59/// Sets bit at position `i` for `data`
60///
61/// # Safety
62///
63/// Note this doesn't do any bound checking, for performance reason. The caller is
64/// responsible to guarantee that `i` is within bounds.
65#[inline]
66pub unsafe fn set_bit_raw(data: *mut u8, i: usize) {
67    *data.add(i / 8) |= 1 << (i % 8);
68}
69
70/// Sets bit at position `i` for `data` to 0
71#[inline]
72pub fn unset_bit(data: &mut [u8], i: usize) {
73    data[i / 8] &= !(1 << (i % 8));
74}
75
76/// Sets bit at position `i` for `data` to 0
77///
78/// # Safety
79///
80/// Note this doesn't do any bound checking, for performance reason. The caller is
81/// responsible to guarantee that `i` is within bounds.
82#[inline]
83pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) {
84    *data.add(i / 8) &= !(1 << (i % 8));
85}
86
87/// Returns the ceil of `value`/`divisor`
88#[inline]
89pub fn ceil(value: usize, divisor: usize) -> usize {
90    value.div_ceil(divisor)
91}
92
93#[cfg(test)]
94mod tests {
95    use std::collections::HashSet;
96
97    use super::*;
98    use rand::rngs::StdRng;
99    use rand::{Rng, SeedableRng};
100
101    #[test]
102    fn test_round_upto_multiple_of_64() {
103        assert_eq!(0, round_upto_multiple_of_64(0));
104        assert_eq!(64, round_upto_multiple_of_64(1));
105        assert_eq!(64, round_upto_multiple_of_64(63));
106        assert_eq!(64, round_upto_multiple_of_64(64));
107        assert_eq!(128, round_upto_multiple_of_64(65));
108        assert_eq!(192, round_upto_multiple_of_64(129));
109    }
110
111    #[test]
112    #[should_panic(expected = "failed to round upto multiple of 64")]
113    fn test_round_upto_multiple_of_64_panic() {
114        let _ = round_upto_multiple_of_64(usize::MAX);
115    }
116
117    #[test]
118    #[should_panic(expected = "failed to round to next highest power of 2")]
119    fn test_round_upto_panic() {
120        let _ = round_upto_power_of_2(usize::MAX, 2);
121    }
122
123    #[test]
124    fn test_get_bit() {
125        // 00001101
126        assert!(get_bit(&[0b00001101], 0));
127        assert!(!get_bit(&[0b00001101], 1));
128        assert!(get_bit(&[0b00001101], 2));
129        assert!(get_bit(&[0b00001101], 3));
130
131        // 01001001 01010010
132        assert!(get_bit(&[0b01001001, 0b01010010], 0));
133        assert!(!get_bit(&[0b01001001, 0b01010010], 1));
134        assert!(!get_bit(&[0b01001001, 0b01010010], 2));
135        assert!(get_bit(&[0b01001001, 0b01010010], 3));
136        assert!(!get_bit(&[0b01001001, 0b01010010], 4));
137        assert!(!get_bit(&[0b01001001, 0b01010010], 5));
138        assert!(get_bit(&[0b01001001, 0b01010010], 6));
139        assert!(!get_bit(&[0b01001001, 0b01010010], 7));
140        assert!(!get_bit(&[0b01001001, 0b01010010], 8));
141        assert!(get_bit(&[0b01001001, 0b01010010], 9));
142        assert!(!get_bit(&[0b01001001, 0b01010010], 10));
143        assert!(!get_bit(&[0b01001001, 0b01010010], 11));
144        assert!(get_bit(&[0b01001001, 0b01010010], 12));
145        assert!(!get_bit(&[0b01001001, 0b01010010], 13));
146        assert!(get_bit(&[0b01001001, 0b01010010], 14));
147        assert!(!get_bit(&[0b01001001, 0b01010010], 15));
148    }
149
150    pub fn seedable_rng() -> StdRng {
151        StdRng::seed_from_u64(42)
152    }
153
154    #[test]
155    fn test_get_bit_raw() {
156        const NUM_BYTE: usize = 10;
157        let mut buf = [0; NUM_BYTE];
158        let mut expected = vec![];
159        let mut rng = seedable_rng();
160        for i in 0..8 * NUM_BYTE {
161            let b = rng.random_bool(0.5);
162            expected.push(b);
163            if b {
164                set_bit(&mut buf[..], i)
165            }
166        }
167
168        let raw_ptr = buf.as_ptr();
169        for (i, b) in expected.iter().enumerate() {
170            unsafe {
171                assert_eq!(*b, get_bit_raw(raw_ptr, i));
172            }
173        }
174    }
175
176    #[test]
177    fn test_set_bit() {
178        let mut b = [0b00000010];
179        set_bit(&mut b, 0);
180        assert_eq!([0b00000011], b);
181        set_bit(&mut b, 1);
182        assert_eq!([0b00000011], b);
183        set_bit(&mut b, 7);
184        assert_eq!([0b10000011], b);
185    }
186
187    #[test]
188    fn test_unset_bit() {
189        let mut b = [0b11111101];
190        unset_bit(&mut b, 0);
191        assert_eq!([0b11111100], b);
192        unset_bit(&mut b, 1);
193        assert_eq!([0b11111100], b);
194        unset_bit(&mut b, 7);
195        assert_eq!([0b01111100], b);
196    }
197
198    #[test]
199    fn test_set_bit_raw() {
200        const NUM_BYTE: usize = 10;
201        let mut buf = vec![0; NUM_BYTE];
202        let mut expected = vec![];
203        let mut rng = seedable_rng();
204        for i in 0..8 * NUM_BYTE {
205            let b = rng.random_bool(0.5);
206            expected.push(b);
207            if b {
208                unsafe {
209                    set_bit_raw(buf.as_mut_ptr(), i);
210                }
211            }
212        }
213
214        let raw_ptr = buf.as_ptr();
215        for (i, b) in expected.iter().enumerate() {
216            unsafe {
217                assert_eq!(*b, get_bit_raw(raw_ptr, i));
218            }
219        }
220    }
221
222    #[test]
223    fn test_unset_bit_raw() {
224        const NUM_BYTE: usize = 10;
225        let mut buf = vec![255; NUM_BYTE];
226        let mut expected = vec![];
227        let mut rng = seedable_rng();
228        for i in 0..8 * NUM_BYTE {
229            let b = rng.random_bool(0.5);
230            expected.push(b);
231            if !b {
232                unsafe {
233                    unset_bit_raw(buf.as_mut_ptr(), i);
234                }
235            }
236        }
237
238        let raw_ptr = buf.as_ptr();
239        for (i, b) in expected.iter().enumerate() {
240            unsafe {
241                assert_eq!(*b, get_bit_raw(raw_ptr, i));
242            }
243        }
244    }
245
246    #[test]
247    fn test_get_set_bit_roundtrip() {
248        const NUM_BYTES: usize = 10;
249        const NUM_SETS: usize = 10;
250
251        let mut buffer: [u8; NUM_BYTES * 8] = [0; NUM_BYTES * 8];
252        let mut v = HashSet::new();
253        let mut rng = seedable_rng();
254        for _ in 0..NUM_SETS {
255            let offset = rng.random_range(0..8 * NUM_BYTES);
256            v.insert(offset);
257            set_bit(&mut buffer[..], offset);
258        }
259        for i in 0..NUM_BYTES * 8 {
260            assert_eq!(v.contains(&i), get_bit(&buffer[..], i));
261        }
262    }
263
264    #[test]
265    fn test_ceil() {
266        assert_eq!(ceil(0, 1), 0);
267        assert_eq!(ceil(1, 1), 1);
268        assert_eq!(ceil(1, 2), 1);
269        assert_eq!(ceil(1, 8), 1);
270        assert_eq!(ceil(7, 8), 1);
271        assert_eq!(ceil(8, 8), 1);
272        assert_eq!(ceil(9, 8), 2);
273        assert_eq!(ceil(9, 9), 1);
274        assert_eq!(ceil(10000000000, 10), 1000000000);
275        assert_eq!(ceil(10, 10000000000), 1);
276        assert_eq!(ceil(10000000000, 1000000000), 10);
277    }
278}