Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
bit-util.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef ARROW_UTIL_BIT_UTIL_H
19 #define ARROW_UTIL_BIT_UTIL_H
20 
21 #ifdef _WIN32
22 #define ARROW_LITTLE_ENDIAN 1
23 #else
24 #ifdef __APPLE__
25 #include <machine/endian.h>
26 #else
27 #include <endian.h>
28 #endif
29 #
30 #ifndef __BYTE_ORDER__
31 #error "__BYTE_ORDER__ not defined"
32 #endif
33 #
34 #ifndef __ORDER_LITTLE_ENDIAN__
35 #error "__ORDER_LITTLE_ENDIAN__ not defined"
36 #endif
37 #
38 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
39 #define ARROW_LITTLE_ENDIAN 1
40 #else
41 #define ARROW_LITTLE_ENDIAN 0
42 #endif
43 #endif
44 
45 #if defined(_MSC_VER)
46 #include <intrin.h>
47 #pragma intrinsic(_BitScanReverse)
48 #define ARROW_BYTE_SWAP64 _byteswap_uint64
49 #define ARROW_BYTE_SWAP32 _byteswap_ulong
50 #else
51 #define ARROW_BYTE_SWAP64 __builtin_bswap64
52 #define ARROW_BYTE_SWAP32 __builtin_bswap32
53 #endif
54 
55 #include <cstdint>
56 #include <limits>
57 #include <memory>
58 #include <type_traits>
59 #include <vector>
60 
61 #include "arrow/util/macros.h"
62 #include "arrow/util/type_traits.h"
63 #include "arrow/util/visibility.h"
64 
65 #ifdef ARROW_USE_SSE
66 #include "arrow/util/cpu-info.h"
67 #include "arrow/util/sse-util.h"
68 #endif
69 
70 namespace arrow {
71 
72 class Buffer;
73 class MemoryPool;
74 class Status;
75 
76 namespace detail {
77 
78 template <typename Integer>
79 typename std::make_unsigned<Integer>::type as_unsigned(Integer x) {
80  return static_cast<typename std::make_unsigned<Integer>::type>(x);
81 }
82 
83 } // namespace detail
84 
85 namespace BitUtil {
86 
87 //
88 // Bit-related computations on integer values
89 //
90 
91 // Returns the ceil of value/divisor
92 static inline int64_t CeilDiv(int64_t value, int64_t divisor) {
93  return value / divisor + (value % divisor != 0);
94 }
95 
96 static inline int64_t BytesForBits(int64_t bits) { return (bits + 7) >> 3; }
97 
98 // Returns the smallest power of two that contains v. If v is already a
99 // power of two, it is returned as is.
100 static inline int64_t NextPower2(int64_t n) {
101  // Taken from
102  // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
103  n--;
104  n |= n >> 1;
105  n |= n >> 2;
106  n |= n >> 4;
107  n |= n >> 8;
108  n |= n >> 16;
109  n |= n >> 32;
110  n++;
111  return n;
112 }
113 
114 static inline bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; }
115 
116 static inline bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }
117 
118 // Returns 'value' rounded up to the nearest multiple of 'factor'
119 static inline int64_t RoundUp(int64_t value, int64_t factor) {
120  return (value + (factor - 1)) / factor * factor;
121 }
122 
123 // Returns 'value' rounded up to the nearest multiple of 'factor' when factor
124 // is a power of two.
125 // The result is undefined on overflow, i.e. if `value > 2**64 - factor`,
126 // since we cannot return the correct result which would be 2**64.
127 static inline int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) {
128  // DCHECK((factor > 0) && ((factor & (factor - 1)) == 0));
129  return (value + (factor - 1)) & ~(factor - 1);
130 }
131 
132 static inline int64_t RoundUpToMultipleOf8(int64_t num) {
133  return RoundUpToPowerOf2(num, 8);
134 }
135 
136 static inline int64_t RoundUpToMultipleOf64(int64_t num) {
137  return RoundUpToPowerOf2(num, 64);
138 }
139 
140 // Returns the 'num_bits' least-significant bits of 'v'.
141 static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
142  if (ARROW_PREDICT_FALSE(num_bits == 0)) return 0;
143  if (ARROW_PREDICT_FALSE(num_bits >= 64)) return v;
144  int n = 64 - num_bits;
145  return (v << n) >> n;
146 }
147 
149 static inline int CountLeadingZeros(uint32_t value) {
150 #if defined(__clang__) || defined(__GNUC__)
151  if (value == 0) return 32;
152  return static_cast<int>(__builtin_clz(value));
153 #elif defined(_MSC_VER)
154  unsigned long index; // NOLINT
155  if (_BitScanReverse(&index, static_cast<unsigned long>(value))) { // NOLINT
156  return 31 - static_cast<int>(index);
157  } else {
158  return 32;
159  }
160 #else
161  int bitpos = 0;
162  while (value != 0) {
163  value >>= 1;
164  ++bitpos;
165  }
166  return 32 - bitpos;
167 #endif
168 }
169 
170 static inline int CountLeadingZeros(uint64_t value) {
171 #if defined(__clang__) || defined(__GNUC__)
172  if (value == 0) return 64;
173  return static_cast<int>(__builtin_clzl(value));
174 #elif defined(_MSC_VER)
175  unsigned long index; // NOLINT
176  if (_BitScanReverse64(&index, value)) { // NOLINT
177  return 63 - static_cast<int>(index);
178  } else {
179  return 64;
180  }
181 #else
182  int bitpos = 0;
183  while (value != 0) {
184  value >>= 1;
185  ++bitpos;
186  }
187  return 64 - bitpos;
188 #endif
189 }
190 
191 // Returns the minimum number of bits needed to represent an unsigned value
192 static inline int NumRequiredBits(uint64_t x) { return 64 - CountLeadingZeros(x); }
193 
194 // Returns ceil(log2(x)).
195 static inline int Log2(uint64_t x) {
196  // DCHECK_GT(x, 0);
197  return NumRequiredBits(x - 1);
198 }
199 
200 //
201 // Byte-swap 16-bit, 32-bit and 64-bit values
202 //
203 
204 // Swap the byte order (i.e. endianess)
205 static inline int64_t ByteSwap(int64_t value) { return ARROW_BYTE_SWAP64(value); }
206 static inline uint64_t ByteSwap(uint64_t value) {
207  return static_cast<uint64_t>(ARROW_BYTE_SWAP64(value));
208 }
209 static inline int32_t ByteSwap(int32_t value) { return ARROW_BYTE_SWAP32(value); }
210 static inline uint32_t ByteSwap(uint32_t value) {
211  return static_cast<uint32_t>(ARROW_BYTE_SWAP32(value));
212 }
213 static inline int16_t ByteSwap(int16_t value) {
214  constexpr auto m = static_cast<int16_t>(0xff);
215  return static_cast<int16_t>(((value >> 8) & m) | ((value & m) << 8));
216 }
217 static inline uint16_t ByteSwap(uint16_t value) {
218  return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value)));
219 }
220 
221 // Write the swapped bytes into dst. Src and dst cannot overlap.
222 static inline void ByteSwap(void* dst, const void* src, int len) {
223  switch (len) {
224  case 1:
225  *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src);
226  return;
227  case 2:
228  *reinterpret_cast<int16_t*>(dst) = ByteSwap(*reinterpret_cast<const int16_t*>(src));
229  return;
230  case 4:
231  *reinterpret_cast<int32_t*>(dst) = ByteSwap(*reinterpret_cast<const int32_t*>(src));
232  return;
233  case 8:
234  *reinterpret_cast<int64_t*>(dst) = ByteSwap(*reinterpret_cast<const int64_t*>(src));
235  return;
236  default:
237  break;
238  }
239 
240  auto d = reinterpret_cast<uint8_t*>(dst);
241  auto s = reinterpret_cast<const uint8_t*>(src);
242  for (int i = 0; i < len; ++i) {
243  d[i] = s[len - i - 1];
244  }
245 }
246 
247 // Convert to little/big endian format from the machine's native endian format.
248 #if ARROW_LITTLE_ENDIAN
249 template <typename T, typename = internal::EnableIfIsOneOf<T, int64_t, uint64_t, int32_t,
250  uint32_t, int16_t, uint16_t>>
251 static inline T ToBigEndian(T value) {
252  return ByteSwap(value);
253 }
254 
255 template <typename T, typename = internal::EnableIfIsOneOf<T, int64_t, uint64_t, int32_t,
256  uint32_t, int16_t, uint16_t>>
257 static inline T ToLittleEndian(T value) {
258  return value;
259 }
260 #else
261 template <typename T, typename = internal::EnableIfIsOneOf<T, int64_t, uint64_t, int32_t,
262  uint32_t, int16_t, uint16_t>>
263 static inline T ToBigEndian(T value) {
264  return value;
265 }
266 
267 template <typename T, typename = internal::EnableIfIsOneOf<T, int64_t, uint64_t, int32_t,
268  uint32_t, int16_t, uint16_t>>
269 static inline T ToLittleEndian(T value) {
270  return ByteSwap(value);
271 }
272 #endif
273 
274 // Convert from big/little endian format to the machine's native endian format.
275 #if ARROW_LITTLE_ENDIAN
276 template <typename T, typename = internal::EnableIfIsOneOf<T, int64_t, uint64_t, int32_t,
277  uint32_t, int16_t, uint16_t>>
278 static inline T FromBigEndian(T value) {
279  return ByteSwap(value);
280 }
281 
282 template <typename T, typename = internal::EnableIfIsOneOf<T, int64_t, uint64_t, int32_t,
283  uint32_t, int16_t, uint16_t>>
284 static inline T FromLittleEndian(T value) {
285  return value;
286 }
287 #else
288 template <typename T, typename = internal::EnableIfIsOneOf<T, int64_t, uint64_t, int32_t,
289  uint32_t, int16_t, uint16_t>>
290 static inline T FromBigEndian(T value) {
291  return value;
292 }
293 
294 template <typename T, typename = internal::EnableIfIsOneOf<T, int64_t, uint64_t, int32_t,
295  uint32_t, int16_t, uint16_t>>
296 static inline T FromLittleEndian(T value) {
297  return ByteSwap(value);
298 }
299 #endif
300 
301 //
302 // Utilities for reading and writing individual bits by their index
303 // in a memory area.
304 //
305 
306 // Bitmask selecting the k-th bit in a byte
307 static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
308 
309 // the bitwise complement version of kBitmask
310 static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
311 
312 // Bitmask selecting the (k - 1) preceding bits in a byte
313 static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
314 
315 // the bitwise complement version of kPrecedingBitmask
316 static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
317 
318 static inline bool GetBit(const uint8_t* bits, int64_t i) {
319  return (bits[i / 8] & kBitmask[i % 8]) != 0;
320 }
321 
322 static inline void ClearBit(uint8_t* bits, int64_t i) {
323  bits[i / 8] &= kFlippedBitmask[i % 8];
324 }
325 
326 static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |= kBitmask[i % 8]; }
327 
328 static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
329  // https://graphics.stanford.edu/~seander/bithacks.html
330  // "Conditionally set or clear bits without branching"
331  // NOTE: this seems to confuse Valgrind as it reads from potentially
332  // uninitialized memory
333  bits[i / 8] ^= static_cast<uint8_t>(-static_cast<uint8_t>(bit_is_set) ^ bits[i / 8]) &
334  kBitmask[i % 8];
335 }
336 
338 ARROW_EXPORT
339 Status BytesToBits(const std::vector<uint8_t>&, MemoryPool*, std::shared_ptr<Buffer>*);
340 
341 } // namespace BitUtil
342 
343 namespace internal {
344 
345 class BitmapReader {
346  public:
347  BitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
348  : bitmap_(bitmap), position_(0), length_(length) {
349  current_byte_ = 0;
350  byte_offset_ = start_offset / 8;
351  bit_offset_ = start_offset % 8;
352  if (length > 0) {
353  current_byte_ = bitmap[byte_offset_];
354  }
355  }
356 
357  bool IsSet() const { return (current_byte_ & (1 << bit_offset_)) != 0; }
358 
359  bool IsNotSet() const { return (current_byte_ & (1 << bit_offset_)) == 0; }
360 
361  void Next() {
362  ++bit_offset_;
363  ++position_;
364  if (ARROW_PREDICT_FALSE(bit_offset_ == 8)) {
365  bit_offset_ = 0;
366  ++byte_offset_;
367  if (ARROW_PREDICT_TRUE(position_ < length_)) {
368  current_byte_ = bitmap_[byte_offset_];
369  }
370  }
371  }
372 
373  private:
374  const uint8_t* bitmap_;
375  int64_t position_;
376  int64_t length_;
377 
378  uint8_t current_byte_;
379  int64_t byte_offset_;
380  int64_t bit_offset_;
381 };
382 
383 class BitmapWriter {
384  // A sequential bitwise writer that preserves surrounding bit values.
385 
386  public:
387  BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
388  : bitmap_(bitmap), position_(0), length_(length) {
389  byte_offset_ = start_offset / 8;
390  bit_mask_ = BitUtil::kBitmask[start_offset % 8];
391  if (length > 0) {
392  current_byte_ = bitmap[byte_offset_];
393  } else {
394  current_byte_ = 0;
395  }
396  }
397 
398  void Set() { current_byte_ |= bit_mask_; }
399 
400  void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; }
401 
402  void Next() {
403  bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
404  ++position_;
405  if (bit_mask_ == 0) {
406  // Finished this byte, need advancing
407  bit_mask_ = 0x01;
408  bitmap_[byte_offset_++] = current_byte_;
409  if (ARROW_PREDICT_TRUE(position_ < length_)) {
410  current_byte_ = bitmap_[byte_offset_];
411  }
412  }
413  }
414 
415  void Finish() {
416  // Store current byte if we didn't went past bitmap storage
417  if (bit_mask_ != 0x01 || position_ < length_) {
418  bitmap_[byte_offset_] = current_byte_;
419  }
420  }
421 
422  int64_t position() const { return position_; }
423 
424  private:
425  uint8_t* bitmap_;
426  int64_t position_;
427  int64_t length_;
428 
429  uint8_t current_byte_;
430  uint8_t bit_mask_;
431  int64_t byte_offset_;
432 };
433 
434 class FirstTimeBitmapWriter {
435  // Like BitmapWriter, but any bit values *following* the bits written
436  // might be clobbered. It is hence faster than BitmapWriter, and can
437  // also avoid false positives with Valgrind.
438 
439  public:
440  FirstTimeBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
441  : bitmap_(bitmap), position_(0), length_(length) {
442  current_byte_ = 0;
443  byte_offset_ = start_offset / 8;
444  bit_mask_ = BitUtil::kBitmask[start_offset % 8];
445  if (length > 0) {
446  current_byte_ = bitmap[byte_offset_] & BitUtil::kPrecedingBitmask[start_offset % 8];
447  } else {
448  current_byte_ = 0;
449  }
450  }
451 
452  void Set() { current_byte_ |= bit_mask_; }
453 
454  void Clear() {}
455 
456  void Next() {
457  bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
458  ++position_;
459  if (bit_mask_ == 0) {
460  // Finished this byte, need advancing
461  bit_mask_ = 0x01;
462  bitmap_[byte_offset_++] = current_byte_;
463  current_byte_ = 0;
464  }
465  }
466 
467  void Finish() {
468  // Store current byte if we didn't went past bitmap storage
469  if (bit_mask_ != 0x01 || position_ < length_) {
470  bitmap_[byte_offset_] = current_byte_;
471  }
472  }
473 
474  int64_t position() const { return position_; }
475 
476  private:
477  uint8_t* bitmap_;
478  int64_t position_;
479  int64_t length_;
480 
481  uint8_t current_byte_;
482  uint8_t bit_mask_;
483  int64_t byte_offset_;
484 };
485 
486 // A std::generate() like function to write sequential bits into a bitmap area.
487 // Bits preceding the bitmap area are preserved, bits following the bitmap
488 // area may be clobbered.
489 
490 template <class Generator>
491 void GenerateBits(uint8_t* bitmap, int64_t start_offset, int64_t length, Generator&& g) {
492  if (length == 0) {
493  return;
494  }
495  uint8_t* cur = bitmap + start_offset / 8;
496  uint8_t bit_mask = BitUtil::kBitmask[start_offset % 8];
497  uint8_t current_byte = *cur & BitUtil::kPrecedingBitmask[start_offset % 8];
498 
499  for (int64_t index = 0; index < length; ++index) {
500  const bool bit = g();
501  current_byte = bit ? (current_byte | bit_mask) : current_byte;
502  bit_mask = static_cast<uint8_t>(bit_mask << 1);
503  if (bit_mask == 0) {
504  bit_mask = 1;
505  *cur++ = current_byte;
506  current_byte = 0;
507  }
508  }
509  if (bit_mask != 1) {
510  *cur++ = current_byte;
511  }
512 }
513 
514 // Like GenerateBits(), but unrolls its main loop for higher performance.
515 
516 template <class Generator>
517 void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
518  Generator&& g) {
519  if (length == 0) {
520  return;
521  }
522  uint8_t current_byte;
523  uint8_t* cur = bitmap + start_offset / 8;
524  const uint64_t start_bit_offset = start_offset % 8;
525  uint8_t bit_mask = BitUtil::kBitmask[start_bit_offset];
526  int64_t remaining = length;
527 
528  if (bit_mask != 0x01) {
529  current_byte = *cur & BitUtil::kPrecedingBitmask[start_bit_offset];
530  while (bit_mask != 0 && remaining > 0) {
531  current_byte = g() ? (current_byte | bit_mask) : current_byte;
532  bit_mask = static_cast<uint8_t>(bit_mask << 1);
533  --remaining;
534  }
535  *cur++ = current_byte;
536  }
537 
538  int64_t remaining_bytes = remaining / 8;
539  while (remaining_bytes-- > 0) {
540  current_byte = 0;
541  current_byte = g() ? current_byte | 0x01 : current_byte;
542  current_byte = g() ? current_byte | 0x02 : current_byte;
543  current_byte = g() ? current_byte | 0x04 : current_byte;
544  current_byte = g() ? current_byte | 0x08 : current_byte;
545  current_byte = g() ? current_byte | 0x10 : current_byte;
546  current_byte = g() ? current_byte | 0x20 : current_byte;
547  current_byte = g() ? current_byte | 0x40 : current_byte;
548  current_byte = g() ? current_byte | 0x80 : current_byte;
549  *cur++ = current_byte;
550  }
551 
552  int64_t remaining_bits = remaining % 8;
553  if (remaining_bits) {
554  current_byte = 0;
555  bit_mask = 0x01;
556  while (remaining_bits-- > 0) {
557  current_byte = g() ? (current_byte | bit_mask) : current_byte;
558  bit_mask = static_cast<uint8_t>(bit_mask << 1);
559  }
560  *cur++ = current_byte;
561  }
562 }
563 
564 // ----------------------------------------------------------------------
565 // Bitmap utilities
566 
576 ARROW_EXPORT
577 Status CopyBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset, int64_t length,
578  std::shared_ptr<Buffer>* out);
579 
588 ARROW_EXPORT
589 void CopyBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
590  int64_t dest_offset);
591 
600 ARROW_EXPORT
601 void InvertBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
602  int64_t dest_offset);
603 
613 ARROW_EXPORT
614 Status InvertBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset,
615  int64_t length, std::shared_ptr<Buffer>* out);
616 
624 ARROW_EXPORT
625 int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length);
626 
627 ARROW_EXPORT
628 bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
629  int64_t right_offset, int64_t bit_length);
630 
631 ARROW_EXPORT
632 Status BitmapAnd(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
633  const uint8_t* right, int64_t right_offset, int64_t length,
634  int64_t out_offset, std::shared_ptr<Buffer>* out_buffer);
635 
636 ARROW_EXPORT
637 Status BitmapOr(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
638  const uint8_t* right, int64_t right_offset, int64_t length,
639  int64_t out_offset, std::shared_ptr<Buffer>* out_buffer);
640 
641 ARROW_EXPORT
642 Status BitmapXor(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
643  const uint8_t* right, int64_t right_offset, int64_t length,
644  int64_t out_offset, std::shared_ptr<Buffer>* out_buffer);
645 
646 } // namespace internal
647 } // namespace arrow
648 
649 #endif // ARROW_UTIL_BIT_UTIL_H
#define ARROW_PREDICT_TRUE(x)
Definition: macros.h:49
#define ARROW_BYTE_SWAP32
Definition: bit-util.h:52
#define ARROW_PREDICT_FALSE(x)
Definition: macros.h:48
Definition: bit-stream-utils.h:33
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
#define ARROW_BYTE_SWAP64
Definition: bit-util.h:51
::arrow::Buffer Buffer
Definition: memory.h:54