arrow_buffer/util/
bit_util.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Utils for working with bits
19
20/// Returns the nearest number that is `>=` than `num` and is a multiple of 64
21#[inline]
22pub fn round_upto_multiple_of_64(num: usize) -> usize {
23    num.checked_next_multiple_of(64)
24        .expect("failed to round upto multiple of 64")
25}
26
27/// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must
28/// be a power of 2.
29pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize {
30    debug_assert!(factor > 0 && factor.is_power_of_two());
31    num.checked_add(factor - 1)
32        .expect("failed to round to next highest power of 2")
33        & !(factor - 1)
34}
35
36/// Returns whether bit at position `i` in `data` is set or not
37#[inline]
38pub fn get_bit(data: &[u8], i: usize) -> bool {
39    data[i / 8] & (1 << (i % 8)) != 0
40}
41
42/// Returns whether bit at position `i` in `data` is set or not.
43///
44/// # Safety
45///
46/// Note this doesn't do any bound checking, for performance reason. The caller is
47/// responsible to guarantee that `i` is within bounds.
48#[inline]
49pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool {
50    unsafe { (*data.add(i / 8) & (1 << (i % 8))) != 0 }
51}
52
53/// Sets bit at position `i` for `data` to 1
54#[inline]
55pub fn set_bit(data: &mut [u8], i: usize) {
56    data[i / 8] |= 1 << (i % 8);
57}
58
59/// Sets bit at position `i` for `data`
60///
61/// # Safety
62///
63/// Note this doesn't do any bound checking, for performance reason. The caller is
64/// responsible to guarantee that `i` is within bounds.
65#[inline]
66pub unsafe fn set_bit_raw(data: *mut u8, i: usize) {
67    unsafe {
68        *data.add(i / 8) |= 1 << (i % 8);
69    }
70}
71
72/// Sets bit at position `i` for `data` to 0
73#[inline]
74pub fn unset_bit(data: &mut [u8], i: usize) {
75    data[i / 8] &= !(1 << (i % 8));
76}
77
78/// Sets bit at position `i` for `data` to 0
79///
80/// # Safety
81///
82/// Note this doesn't do any bound checking, for performance reason. The caller is
83/// responsible to guarantee that `i` is within bounds.
84#[inline]
85pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) {
86    unsafe {
87        *data.add(i / 8) &= !(1 << (i % 8));
88    }
89}
90
91/// Returns the ceil of `value`/`divisor`
92#[inline]
93pub fn ceil(value: usize, divisor: usize) -> usize {
94    value.div_ceil(divisor)
95}
96
97#[cfg(test)]
98mod tests {
99    use std::collections::HashSet;
100
101    use super::*;
102    use rand::rngs::StdRng;
103    use rand::{Rng, SeedableRng};
104
105    #[test]
106    fn test_round_upto_multiple_of_64() {
107        assert_eq!(0, round_upto_multiple_of_64(0));
108        assert_eq!(64, round_upto_multiple_of_64(1));
109        assert_eq!(64, round_upto_multiple_of_64(63));
110        assert_eq!(64, round_upto_multiple_of_64(64));
111        assert_eq!(128, round_upto_multiple_of_64(65));
112        assert_eq!(192, round_upto_multiple_of_64(129));
113    }
114
115    #[test]
116    #[should_panic(expected = "failed to round upto multiple of 64")]
117    fn test_round_upto_multiple_of_64_panic() {
118        let _ = round_upto_multiple_of_64(usize::MAX);
119    }
120
121    #[test]
122    #[should_panic(expected = "failed to round to next highest power of 2")]
123    fn test_round_upto_panic() {
124        let _ = round_upto_power_of_2(usize::MAX, 2);
125    }
126
127    #[test]
128    fn test_get_bit() {
129        // 00001101
130        assert!(get_bit(&[0b00001101], 0));
131        assert!(!get_bit(&[0b00001101], 1));
132        assert!(get_bit(&[0b00001101], 2));
133        assert!(get_bit(&[0b00001101], 3));
134
135        // 01001001 01010010
136        assert!(get_bit(&[0b01001001, 0b01010010], 0));
137        assert!(!get_bit(&[0b01001001, 0b01010010], 1));
138        assert!(!get_bit(&[0b01001001, 0b01010010], 2));
139        assert!(get_bit(&[0b01001001, 0b01010010], 3));
140        assert!(!get_bit(&[0b01001001, 0b01010010], 4));
141        assert!(!get_bit(&[0b01001001, 0b01010010], 5));
142        assert!(get_bit(&[0b01001001, 0b01010010], 6));
143        assert!(!get_bit(&[0b01001001, 0b01010010], 7));
144        assert!(!get_bit(&[0b01001001, 0b01010010], 8));
145        assert!(get_bit(&[0b01001001, 0b01010010], 9));
146        assert!(!get_bit(&[0b01001001, 0b01010010], 10));
147        assert!(!get_bit(&[0b01001001, 0b01010010], 11));
148        assert!(get_bit(&[0b01001001, 0b01010010], 12));
149        assert!(!get_bit(&[0b01001001, 0b01010010], 13));
150        assert!(get_bit(&[0b01001001, 0b01010010], 14));
151        assert!(!get_bit(&[0b01001001, 0b01010010], 15));
152    }
153
154    pub fn seedable_rng() -> StdRng {
155        StdRng::seed_from_u64(42)
156    }
157
158    #[test]
159    fn test_get_bit_raw() {
160        const NUM_BYTE: usize = 10;
161        let mut buf = [0; NUM_BYTE];
162        let mut expected = vec![];
163        let mut rng = seedable_rng();
164        for i in 0..8 * NUM_BYTE {
165            let b = rng.random_bool(0.5);
166            expected.push(b);
167            if b {
168                set_bit(&mut buf[..], i)
169            }
170        }
171
172        let raw_ptr = buf.as_ptr();
173        for (i, b) in expected.iter().enumerate() {
174            unsafe {
175                assert_eq!(*b, get_bit_raw(raw_ptr, i));
176            }
177        }
178    }
179
180    #[test]
181    fn test_set_bit() {
182        let mut b = [0b00000010];
183        set_bit(&mut b, 0);
184        assert_eq!([0b00000011], b);
185        set_bit(&mut b, 1);
186        assert_eq!([0b00000011], b);
187        set_bit(&mut b, 7);
188        assert_eq!([0b10000011], b);
189    }
190
191    #[test]
192    fn test_unset_bit() {
193        let mut b = [0b11111101];
194        unset_bit(&mut b, 0);
195        assert_eq!([0b11111100], b);
196        unset_bit(&mut b, 1);
197        assert_eq!([0b11111100], b);
198        unset_bit(&mut b, 7);
199        assert_eq!([0b01111100], b);
200    }
201
202    #[test]
203    fn test_set_bit_raw() {
204        const NUM_BYTE: usize = 10;
205        let mut buf = vec![0; NUM_BYTE];
206        let mut expected = vec![];
207        let mut rng = seedable_rng();
208        for i in 0..8 * NUM_BYTE {
209            let b = rng.random_bool(0.5);
210            expected.push(b);
211            if b {
212                unsafe {
213                    set_bit_raw(buf.as_mut_ptr(), i);
214                }
215            }
216        }
217
218        let raw_ptr = buf.as_ptr();
219        for (i, b) in expected.iter().enumerate() {
220            unsafe {
221                assert_eq!(*b, get_bit_raw(raw_ptr, i));
222            }
223        }
224    }
225
226    #[test]
227    fn test_unset_bit_raw() {
228        const NUM_BYTE: usize = 10;
229        let mut buf = vec![255; NUM_BYTE];
230        let mut expected = vec![];
231        let mut rng = seedable_rng();
232        for i in 0..8 * NUM_BYTE {
233            let b = rng.random_bool(0.5);
234            expected.push(b);
235            if !b {
236                unsafe {
237                    unset_bit_raw(buf.as_mut_ptr(), i);
238                }
239            }
240        }
241
242        let raw_ptr = buf.as_ptr();
243        for (i, b) in expected.iter().enumerate() {
244            unsafe {
245                assert_eq!(*b, get_bit_raw(raw_ptr, i));
246            }
247        }
248    }
249
250    #[test]
251    fn test_get_set_bit_roundtrip() {
252        const NUM_BYTES: usize = 10;
253        const NUM_SETS: usize = 10;
254
255        let mut buffer: [u8; NUM_BYTES * 8] = [0; NUM_BYTES * 8];
256        let mut v = HashSet::new();
257        let mut rng = seedable_rng();
258        for _ in 0..NUM_SETS {
259            let offset = rng.random_range(0..8 * NUM_BYTES);
260            v.insert(offset);
261            set_bit(&mut buffer[..], offset);
262        }
263        for i in 0..NUM_BYTES * 8 {
264            assert_eq!(v.contains(&i), get_bit(&buffer[..], i));
265        }
266    }
267
268    #[test]
269    fn test_ceil() {
270        assert_eq!(ceil(0, 1), 0);
271        assert_eq!(ceil(1, 1), 1);
272        assert_eq!(ceil(1, 2), 1);
273        assert_eq!(ceil(1, 8), 1);
274        assert_eq!(ceil(7, 8), 1);
275        assert_eq!(ceil(8, 8), 1);
276        assert_eq!(ceil(9, 8), 2);
277        assert_eq!(ceil(9, 9), 1);
278        assert_eq!(ceil(10000000000, 10), 1000000000);
279        assert_eq!(ceil(10, 10000000000), 1);
280        assert_eq!(ceil(10000000000, 1000000000), 10);
281    }
282}