SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Random.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Viktor Gal
8  * Copyright (C) 2013 Viktor Gal
9  */
10 
12 #include <shogun/base/Parameter.h>
13 #include <shogun/lib/external/SFMT/SFMT.h>
14 #include <shogun/lib/external/dSFMT/dSFMT.h>
15 #include <shogun/lib/Time.h>
16 #include <shogun/lib/Lock.h>
17 
18 using namespace shogun;
19 
21  : m_seed((uint32_t)CTime::get_curtime()*100),
22  m_sfmt_32(NULL),
23  m_sfmt_64(NULL),
24  m_dsfmt(NULL)
25 {
26  init();
27 }
28 
29 CRandom::CRandom(uint32_t seed)
30  : m_seed(seed),
31  m_sfmt_32(NULL),
32  m_sfmt_64(NULL),
33  m_dsfmt(NULL)
34 {
35  init();
36 }
37 
39 {
40  SG_FREE(m_x);
41  SG_FREE(m_y);
42  SG_FREE(m_xComp);
43  SG_FREE(m_sfmt_32);
44  SG_FREE(m_sfmt_64);
45  SG_FREE(m_dsfmt);
46 }
47 
48 void CRandom::set_seed(uint32_t seed)
49 {
50  reinit(seed);
51 }
52 
53 uint32_t CRandom::get_seed() const
54 {
55  return m_seed;
56 }
57 
58 void CRandom::init()
59 {
61  m_blockCount = 128;
62  m_R = 3.442619855899;
63  m_A = 9.91256303526217e-3;
64  m_uint32ToU = 1.0 / (float64_t)std::numeric_limits<uint32_t>::max();
65 
66  m_x = SG_MALLOC(float64_t, m_blockCount + 1);
67  m_y = SG_MALLOC(float64_t, m_blockCount);
68  m_xComp = SG_MALLOC(uint32_t, m_blockCount);
69 
70  // Initialise rectangle position data.
71  // m_x[i] and m_y[i] describe the top-right position ox Box i.
72 
73  // Determine top right position of the base rectangle/box (the rectangle with the Gaussian tale attached).
74  // We call this Box 0 or B0 for short.
75  // Note. x[0] also describes the right-hand edge of B1. (See diagram).
76  m_x[0] = m_R;
77  m_y[0] = GaussianPdfDenorm(m_R);
78 
79  // The next box (B1) has a right hand X edge the same as B0.
80  // Note. B1's height is the box area divided by its width, hence B1 has a smaller height than B0 because
81  // B0's total area includes the attached distribution tail.
82  m_x[1] = m_R;
83  m_y[1] = m_y[0] + (m_A / m_x[1]);
84 
85  // Calc positions of all remaining rectangles.
86  for(int i=2; i < m_blockCount; i++)
87  {
88  m_x[i] = GaussianPdfDenormInv(m_y[i-1]);
89  m_y[i] = m_y[i-1] + (m_A / m_x[i]);
90  }
91 
92  // For completeness we define the right-hand edge of a notional box 6 as being zero (a box with no area).
93  m_x[m_blockCount] = 0.0;
94 
95  // Useful precomputed values.
96  m_A_div_y0 = m_A / m_y[0];
97 
98  // Special case for base box. m_xComp[0] stores the area of B0 as a proportion of R
99  // (recalling that all segments have area A, but that the base segment is the combination of B0 and the distribution tail).
100  // Thus -m_xComp[0] is the probability that a sample point is within the box part of the segment.
101  m_xComp[0] = (uint32_t)(((m_R * m_y[0]) / m_A) * (float64_t)std::numeric_limits<uint32_t>::max());
102 
103  for(int32_t i=1; i < m_blockCount-1; i++)
104  {
105  m_xComp[i] = (uint32_t)((m_x[i+1] / m_x[i]) * (float64_t)std::numeric_limits<uint32_t>::max());
106  }
107  m_xComp[m_blockCount-1] = 0; // Shown for completeness.
108 
109  // Sanity check. Test that the top edge of the topmost rectangle is at y=1.0.
110  // Note. We expect there to be a tiny drift away from 1.0 due to the inexactness of floating
111  // point arithmetic.
112  ASSERT(CMath::abs(1.0 - m_y[m_blockCount-1]) < 1e-10);
113 
115  m_sfmt_32 = SG_MALLOC(sfmt_t, 1);
116  m_sfmt_64 = SG_MALLOC(sfmt_t, 1);
117  m_dsfmt = SG_MALLOC(dsfmt_t, 1);
118  reinit(m_seed);
119 }
120 
121 uint32_t CRandom::random_32() const
122 {
123  return sfmt_genrand_uint32(m_sfmt_32);
124 }
125 
126 uint64_t CRandom::random_64() const
127 {
128  return sfmt_genrand_uint64(m_sfmt_64);
129 }
130 
131 void CRandom::fill_array(uint32_t* array, int32_t size) const
132 {
133 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
134  if ((size >= sfmt_get_min_array_size32(m_sfmt_32)) && (size % 4) == 0)
135  {
136  sfmt_fill_array32(m_sfmt_32, array, size);
137  return;
138  }
139 #endif
140  for (int32_t i=0; i < size; i++)
141  array[i] = random_32();
142 }
143 
144 void CRandom::fill_array(uint64_t* array, int32_t size) const
145 {
146 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
147  if ((size >= sfmt_get_min_array_size64(m_sfmt_64)) && (size % 2) == 0)
148  {
149  sfmt_fill_array64(m_sfmt_64, array, size);
150  return;
151  }
152 #endif
153  for (int32_t i=0; i < size; i++)
154  array[i] = random_64();
155 }
156 
157 void CRandom::fill_array_oc(float64_t* array, int32_t size) const
158 {
159 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
160  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
161  {
162  dsfmt_fill_array_open_close(m_dsfmt, array, size);
163  return;
164  }
165 #endif
166  for (int32_t i=0; i < size; i++)
167  array[i] = dsfmt_genrand_open_close(m_dsfmt);
168 }
169 
170 void CRandom::fill_array_co(float64_t* array, int32_t size) const
171 {
172 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
173  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
174  {
175  dsfmt_fill_array_close_open(m_dsfmt, array, size);
176  return;
177  }
178 #endif
179  for (int32_t i=0; i < size; i++)
180  array[i] = dsfmt_genrand_close_open(m_dsfmt);
181 }
182 
183 void CRandom::fill_array_oo(float64_t* array, int32_t size) const
184 {
185 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
186  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
187  {
188  dsfmt_fill_array_open_open(m_dsfmt, array, size);
189  return;
190  }
191 #endif
192  for (int32_t i=0; i < size; i++)
193  array[i] = dsfmt_genrand_open_open(m_dsfmt);
194 }
195 
196 void CRandom::fill_array_c1o2(float64_t* array, int32_t size) const
197 {
198 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
199  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
200  {
201  dsfmt_fill_array_close1_open2(m_dsfmt, array, size);
202  return;
203  }
204 #endif
205  for (int32_t i=0; i < size; i++)
206  array[i] = dsfmt_genrand_close1_open2(m_dsfmt);
207 }
208 
210 {
211  return sfmt_genrand_real1(m_sfmt_32);
212 }
213 
215 {
216  return dsfmt_genrand_open_open(m_dsfmt);
217 }
218 
220 {
221  return dsfmt_genrand_close_open(m_dsfmt);
222 }
223 
225 {
226  return mu + (std_normal_distrib() * sigma);
227 }
228 
230 {
231  for (;;)
232  {
233  // Select box at random.
234  uint8_t u = random_32();
235  int32_t i = (int32_t)(u & 0x7F);
236  float64_t sign = ((u & 0x80) == 0) ? -1.0 : 1.0;
237 
238  // Generate uniform random value with range [0,0xffffffff].
239  uint32_t u2 = random_32();
240 
241  // Special case for the base segment.
242  if(0 == i)
243  {
244  if(u2 < m_xComp[0])
245  {
246  // Generated x is within R0.
247  return u2 * m_uint32ToU * m_A_div_y0 * sign;
248  }
249  // Generated x is in the tail of the distribution.
250  return sample_tail() * sign;
251  }
252 
253  // All other segments.
254  if(u2 < m_xComp[i])
255  { // Generated x is within the rectangle.
256  return u2 * m_uint32ToU * m_x[i] * sign;
257  }
258 
259  // Generated x is outside of the rectangle.
260  // Generate a random y coordinate and test if our (x,y) is within the distribution curve.
261  // This execution path is relatively slow/expensive (makes a call to Math.Exp()) but relatively rarely executed,
262  // although more often than the 'tail' path (above).
263  float64_t x = u2 * m_uint32ToU * m_x[i];
264  if(m_y[i-1] + ((m_y[i] - m_y[i-1]) * random_half_open()) < GaussianPdfDenorm(x) ) {
265  return x * sign;
266  }
267  }
268 }
269 
270 float64_t CRandom::sample_tail() const
271 {
272  float64_t x, y;
273  do
274  {
275  x = -CMath::log(random_half_open()) / m_R;
277  } while(y+y < x*x);
278  return m_R + x;
279 }
280 
281 float64_t CRandom::GaussianPdfDenorm(float64_t x) const
282 {
283  return CMath::exp(-(x*x / 2.0));
284 }
285 
286 float64_t CRandom::GaussianPdfDenormInv(float64_t y) const
287 {
288  // Operates over the y range (0,1], which happens to be the y range of the pdf,
289  // with the exception that it does not include y=0, but we would never call with
290  // y=0 so it doesn't matter. Remember that a Gaussian effectively has a tail going
291  // off into x == infinity, hence asking what is x when y=0 is an invalid question
292  // in the context of this class.
293  return CMath::sqrt(-2.0 * CMath::log(y));
294 }
295 
296 void CRandom::reinit(uint32_t seed)
297 {
298  m_state_lock.lock();
299  m_seed = seed;
300  sfmt_init_gen_rand(m_sfmt_32, m_seed);
301  sfmt_init_gen_rand(m_sfmt_64, m_seed);
302  dsfmt_init_gen_rand(m_dsfmt, m_seed);
303  m_state_lock.unlock();
304 }
305 

SHOGUN Machine Learning Toolbox - Documentation