Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
random.h
Go to the documentation of this file.
1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 
5 // Moved from Kudu http://github.com/cloudera/kudu
6 
7 #ifndef ARROW_UTIL_RANDOM_H_
8 #define ARROW_UTIL_RANDOM_H_
9 
10 #include <stdint.h>
11 
12 #include <cmath>
13 
14 namespace arrow {
15 namespace internal {
16 namespace random {
17 
18 static const uint32_t M = 2147483647L; // 2^31-1
19 const double kTwoPi = 6.283185307179586476925286;
20 
21 } // namespace random
22 } // namespace internal
23 
24 // A very simple random number generator. Not especially good at
25 // generating truly random bits, but good enough for our needs in this
26 // package. This implementation is not thread-safe.
27 class Random {
28  public:
29  explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) {
30  // Avoid bad seeds.
31  if (seed_ == 0 || seed_ == internal::random::M) {
32  seed_ = 1;
33  }
34  }
35 
36  // Next pseudo-random 32-bit unsigned integer.
37  // FIXME: This currently only generates 31 bits of randomness.
38  // The MSB will always be zero.
39  uint32_t Next() {
40  static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0
41  // We are computing
42  // seed_ = (seed_ * A) % M, where M = 2^31-1
43  //
44  // seed_ must not be zero or M, or else all subsequent computed values
45  // will be zero or M respectively. For all other values, seed_ will end
46  // up cycling through every number in [1,M-1]
47  uint64_t product = seed_ * A;
48 
49  // Compute (product % M) using the fact that ((x << 31) % M) == x.
50  seed_ = static_cast<uint32_t>((product >> 31) + (product & internal::random::M));
51  // The first reduction may overflow by 1 bit, so we may need to
52  // repeat. mod == M is not possible; using > allows the faster
53  // sign-bit-based test.
54  if (seed_ > internal::random::M) {
55  seed_ -= internal::random::M;
56  }
57  return seed_;
58  }
59 
60  // Alias for consistency with Next64
61  uint32_t Next32() { return Next(); }
62 
63  // Next pseudo-random 64-bit unsigned integer.
64  // FIXME: This currently only generates 62 bits of randomness due to Next()
65  // only giving 31 bits of randomness. The 2 most significant bits will always
66  // be zero.
67  uint64_t Next64() {
68  uint64_t large = Next();
69  // Only shift by 31 bits so we end up with zeros in MSB and not scattered
70  // throughout the 64-bit word. This is due to the weakness in Next() noted
71  // above.
72  large <<= 31;
73  large |= Next();
74  return large;
75  }
76 
77  // Returns a uniformly distributed value in the range [0..n-1]
78  // REQUIRES: n > 0
79  uint32_t Uniform(uint32_t n) { return Next() % n; }
80 
81  // Alias for consistency with Uniform64
82  uint32_t Uniform32(uint32_t n) { return Uniform(n); }
83 
84  // Returns a uniformly distributed 64-bit value in the range [0..n-1]
85  // REQUIRES: n > 0
86  uint64_t Uniform64(uint64_t n) { return Next64() % n; }
87 
88  // Randomly returns true ~"1/n" of the time, and false otherwise.
89  // REQUIRES: n > 0
90  bool OneIn(int n) { return (Next() % n) == 0; }
91 
92  // Skewed: pick "base" uniformly from range [0,max_log] and then
93  // return "base" random bits. The effect is to pick a number in the
94  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
95  uint32_t Skewed(int max_log) { return Uniform(1 << Uniform(max_log + 1)); }
96 
97  // Creates a normal distribution variable using the
98  // Box-Muller transform. See:
99  // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
100  // Adapted from WebRTC source code at:
101  // webrtc/trunk/modules/video_coding/main/test/test_util.cc
102  double Normal(double mean, double std_dev) {
103  double uniform1 = (Next() + 1.0) / (internal::random::M + 1.0);
104  double uniform2 = (Next() + 1.0) / (internal::random::M + 1.0);
105  return (mean +
106  std_dev * sqrt(-2 * ::log(uniform1)) *
107  cos(internal::random::kTwoPi * uniform2));
108  }
109 
110  // Return a random number between 0.0 and 1.0 inclusive.
112  return Next() / static_cast<double>(internal::random::M + 1.0);
113  }
114 
115  private:
116  uint32_t seed_;
117 };
118 
119 uint32_t random_seed() {
120  // TODO(wesm): use system time to get a reasonably random seed
121  return 0;
122 }
123 
124 } // namespace arrow
125 
126 #endif // ARROW_UTIL_RANDOM_H_
uint32_t random_seed()
Definition: random.h:119
uint32_t Uniform(uint32_t n)
Definition: random.h:79
uint64_t Uniform64(uint64_t n)
Definition: random.h:86
Random(uint32_t s)
Definition: random.h:29
double NextDoubleFraction()
Definition: random.h:111
uint64_t Next64()
Definition: random.h:67
uint32_t Next()
Definition: random.h:39
double Normal(double mean, double std_dev)
Definition: random.h:102
uint32_t Skewed(int max_log)
Definition: random.h:95
Top-level namespace for Apache Arrow C++ API.
Definition: allocator.h:29
Definition: random.h:27
uint32_t Uniform32(uint32_t n)
Definition: random.h:82
bool OneIn(int n)
Definition: random.h:90
uint32_t Next32()
Definition: random.h:61