3 * @brief double precision SIMD-oriented Fast Mersenne Twister (dSFMT)
4 * based on IEEE 754 format.
6 * @author Mutsuo Saito (Hiroshima University)
7 * @author Makoto Matsumoto (Hiroshima University)
9 * Copyright (C) 2007,2008 Mutsuo Saito, Makoto Matsumoto and Hiroshima
10 * University. All rights reserved.
12 * The new BSD License is applied to this software, see LICENSE.txt
17 #include "dSFMT-params.h"
18 #include "dSFMT-common.h"
20 #if defined(__cplusplus)
24 /** dsfmt internal state vector */
25 dsfmt_t dsfmt_global_data;
26 /** dsfmt mexp for check */
27 static const int dsfmt_mexp = DSFMT_MEXP;
32 inline static uint32_t ini_func1(uint32_t x);
33 inline static uint32_t ini_func2(uint32_t x);
34 inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
36 inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
38 inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
40 inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
42 inline static int idxof(int i);
43 static void initial_mask(dsfmt_t *dsfmt);
44 static void period_certification(dsfmt_t *dsfmt);
46 #if defined(HAVE_SSE2)
47 /** 1 in 64bit for sse2 */
48 static const union X128I_T sse2_int_one = {{1, 1}};
49 /** 2.0 double for sse2 */
50 static const union X128D_T sse2_double_two = {{2.0, 2.0}};
51 /** -1.0 double for sse2 */
52 static const union X128D_T sse2_double_m_one = {{-1.0, -1.0}};
56 * This function simulate a 32-bit array index overlapped to 64-bit
57 * array of LITTLE ENDIAN in BIG ENDIAN machine.
59 #if defined(DSFMT_BIG_ENDIAN)
60 inline static int idxof(int i) {
64 inline static int idxof(int i) {
69 #if defined(HAVE_SSE2)
71 * This function converts the double precision floating point numbers which
72 * distribute uniformly in the range [1, 2) to those which distribute uniformly
73 * in the range [0, 1).
74 * @param w 128bit stracture of double precision floating point numbers (I/O)
76 inline static void convert_c0o1(w128_t *w) {
77 w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128);
81 * This function converts the double precision floating point numbers which
82 * distribute uniformly in the range [1, 2) to those which distribute uniformly
83 * in the range (0, 1].
84 * @param w 128bit stracture of double precision floating point numbers (I/O)
86 inline static void convert_o0c1(w128_t *w) {
87 w->sd = _mm_sub_pd(sse2_double_two.d128, w->sd);
91 * This function converts the double precision floating point numbers which
92 * distribute uniformly in the range [1, 2) to those which distribute uniformly
93 * in the range (0, 1).
94 * @param w 128bit stracture of double precision floating point numbers (I/O)
96 inline static void convert_o0o1(w128_t *w) {
97 w->si = _mm_or_si128(w->si, sse2_int_one.i128);
98 w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128);
100 #else /* standard C and altivec */
102 * This function converts the double precision floating point numbers which
103 * distribute uniformly in the range [1, 2) to those which distribute uniformly
104 * in the range [0, 1).
105 * @param w 128bit stracture of double precision floating point numbers (I/O)
107 inline static void convert_c0o1(w128_t *w) {
113 * This function converts the double precision floating point numbers which
114 * distribute uniformly in the range [1, 2) to those which distribute uniformly
115 * in the range (0, 1].
116 * @param w 128bit stracture of double precision floating point numbers (I/O)
118 inline static void convert_o0c1(w128_t *w) {
119 w->d[0] = 2.0 - w->d[0];
120 w->d[1] = 2.0 - w->d[1];
124 * This function converts the double precision floating point numbers which
125 * distribute uniformly in the range [1, 2) to those which distribute uniformly
126 * in the range (0, 1).
127 * @param w 128bit stracture of double precision floating point numbers (I/O)
129 inline static void convert_o0o1(w128_t *w) {
138 * This function fills the user-specified array with double precision
139 * floating point pseudorandom numbers of the IEEE 754 format.
140 * @param dsfmt dsfmt state vector.
141 * @param array an 128-bit array to be filled by pseudorandom numbers.
142 * @param size number of 128-bit pseudorandom numbers to be generated.
144 inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
149 lung = dsfmt->status[DSFMT_N];
150 do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
152 for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
153 do_recursion(&array[i], &dsfmt->status[i],
154 &dsfmt->status[i + DSFMT_POS1], &lung);
156 for (; i < DSFMT_N; i++) {
157 do_recursion(&array[i], &dsfmt->status[i],
158 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
160 for (; i < size - DSFMT_N; i++) {
161 do_recursion(&array[i], &array[i - DSFMT_N],
162 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
164 for (j = 0; j < 2 * DSFMT_N - size; j++) {
165 dsfmt->status[j] = array[j + size - DSFMT_N];
167 for (; i < size; i++, j++) {
168 do_recursion(&array[i], &array[i - DSFMT_N],
169 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
170 dsfmt->status[j] = array[i];
172 dsfmt->status[DSFMT_N] = lung;
176 * This function fills the user-specified array with double precision
177 * floating point pseudorandom numbers of the IEEE 754 format.
178 * @param dsfmt dsfmt state vector.
179 * @param array an 128-bit array to be filled by pseudorandom numbers.
180 * @param size number of 128-bit pseudorandom numbers to be generated.
182 inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
187 lung = dsfmt->status[DSFMT_N];
188 do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
190 for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
191 do_recursion(&array[i], &dsfmt->status[i],
192 &dsfmt->status[i + DSFMT_POS1], &lung);
194 for (; i < DSFMT_N; i++) {
195 do_recursion(&array[i], &dsfmt->status[i],
196 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
198 for (; i < size - DSFMT_N; i++) {
199 do_recursion(&array[i], &array[i - DSFMT_N],
200 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
201 convert_c0o1(&array[i - DSFMT_N]);
203 for (j = 0; j < 2 * DSFMT_N - size; j++) {
204 dsfmt->status[j] = array[j + size - DSFMT_N];
206 for (; i < size; i++, j++) {
207 do_recursion(&array[i], &array[i - DSFMT_N],
208 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
209 dsfmt->status[j] = array[i];
210 convert_c0o1(&array[i - DSFMT_N]);
212 for (i = size - DSFMT_N; i < size; i++) {
213 convert_c0o1(&array[i]);
215 dsfmt->status[DSFMT_N] = lung;
219 * This function fills the user-specified array with double precision
220 * floating point pseudorandom numbers of the IEEE 754 format.
221 * @param dsfmt dsfmt state vector.
222 * @param array an 128-bit array to be filled by pseudorandom numbers.
223 * @param size number of 128-bit pseudorandom numbers to be generated.
225 inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
230 lung = dsfmt->status[DSFMT_N];
231 do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
233 for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
234 do_recursion(&array[i], &dsfmt->status[i],
235 &dsfmt->status[i + DSFMT_POS1], &lung);
237 for (; i < DSFMT_N; i++) {
238 do_recursion(&array[i], &dsfmt->status[i],
239 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
241 for (; i < size - DSFMT_N; i++) {
242 do_recursion(&array[i], &array[i - DSFMT_N],
243 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
244 convert_o0o1(&array[i - DSFMT_N]);
246 for (j = 0; j < 2 * DSFMT_N - size; j++) {
247 dsfmt->status[j] = array[j + size - DSFMT_N];
249 for (; i < size; i++, j++) {
250 do_recursion(&array[i], &array[i - DSFMT_N],
251 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
252 dsfmt->status[j] = array[i];
253 convert_o0o1(&array[i - DSFMT_N]);
255 for (i = size - DSFMT_N; i < size; i++) {
256 convert_o0o1(&array[i]);
258 dsfmt->status[DSFMT_N] = lung;
262 * This function fills the user-specified array with double precision
263 * floating point pseudorandom numbers of the IEEE 754 format.
264 * @param dsfmt dsfmt state vector.
265 * @param array an 128-bit array to be filled by pseudorandom numbers.
266 * @param size number of 128-bit pseudorandom numbers to be generated.
268 inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
273 lung = dsfmt->status[DSFMT_N];
274 do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
276 for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
277 do_recursion(&array[i], &dsfmt->status[i],
278 &dsfmt->status[i + DSFMT_POS1], &lung);
280 for (; i < DSFMT_N; i++) {
281 do_recursion(&array[i], &dsfmt->status[i],
282 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
284 for (; i < size - DSFMT_N; i++) {
285 do_recursion(&array[i], &array[i - DSFMT_N],
286 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
287 convert_o0c1(&array[i - DSFMT_N]);
289 for (j = 0; j < 2 * DSFMT_N - size; j++) {
290 dsfmt->status[j] = array[j + size - DSFMT_N];
292 for (; i < size; i++, j++) {
293 do_recursion(&array[i], &array[i - DSFMT_N],
294 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
295 dsfmt->status[j] = array[i];
296 convert_o0c1(&array[i - DSFMT_N]);
298 for (i = size - DSFMT_N; i < size; i++) {
299 convert_o0c1(&array[i]);
301 dsfmt->status[DSFMT_N] = lung;
305 * This function represents a function used in the initialization
307 * @param x 32-bit integer
308 * @return 32-bit integer
310 static uint32_t ini_func1(uint32_t x) {
311 return (x ^ (x >> 27)) * (uint32_t)1664525UL;
315 * This function represents a function used in the initialization
317 * @param x 32-bit integer
318 * @return 32-bit integer
320 static uint32_t ini_func2(uint32_t x) {
321 return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
325 * This function initializes the internal state array to fit the IEEE
327 * @param dsfmt dsfmt state vector.
329 static void initial_mask(dsfmt_t *dsfmt) {
333 psfmt = &dsfmt->status[0].u[0];
334 for (i = 0; i < DSFMT_N * 2; i++) {
335 psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST;
340 * This function certificate the period of 2^{SFMT_MEXP}-1.
341 * @param dsfmt dsfmt state vector.
343 static void period_certification(dsfmt_t *dsfmt) {
344 uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2};
348 #if (DSFMT_PCV2 & 1) != 1
353 tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1);
354 tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2);
356 inner = tmp[0] & pcv[0];
357 inner ^= tmp[1] & pcv[1];
358 for (i = 32; i > 0; i >>= 1) {
366 /* check NG, and modification */
367 #if (DSFMT_PCV2 & 1) == 1
368 dsfmt->status[DSFMT_N].u[1] ^= 1;
370 for (i = 1; i >= 0; i--) {
372 for (j = 0; j < 64; j++) {
373 if ((work & pcv[i]) != 0) {
374 dsfmt->status[DSFMT_N].u[i] ^= work;
388 * This function returns the identification string. The string shows
389 * the Mersenne exponent, and all parameters of this generator.
392 const char *dsfmt_get_idstring(void) {
397 * This function returns the minimum size of array used for \b
398 * fill_array functions.
399 * @return minimum size of array used for fill_array functions.
401 int dsfmt_get_min_array_size(void) {
406 * This function fills the internal state array with double precision
407 * floating point pseudorandom numbers of the IEEE 754 format.
408 * @param dsfmt dsfmt state vector.
410 void dsfmt_gen_rand_all(dsfmt_t *dsfmt) {
414 lung = dsfmt->status[DSFMT_N];
415 do_recursion(&dsfmt->status[0], &dsfmt->status[0],
416 &dsfmt->status[DSFMT_POS1], &lung);
417 for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
418 do_recursion(&dsfmt->status[i], &dsfmt->status[i],
419 &dsfmt->status[i + DSFMT_POS1], &lung);
421 for (; i < DSFMT_N; i++) {
422 do_recursion(&dsfmt->status[i], &dsfmt->status[i],
423 &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung);
425 dsfmt->status[DSFMT_N] = lung;
429 * This function generates double precision floating point
430 * pseudorandom numbers which distribute in the range [1, 2) to the
431 * specified array[] by one call. The number of pseudorandom numbers
432 * is specified by the argument \b size, which must be at least (SFMT_MEXP
433 * / 128) * 2 and a multiple of two. The function
434 * get_min_array_size() returns this minimum size. The generation by
435 * this function is much faster than the following fill_array_xxx functions.
437 * For initialization, init_gen_rand() or init_by_array() must be called
438 * before the first call of this function. This function can not be
439 * used after calling genrand_xxx functions, without initialization.
441 * @param dsfmt dsfmt state vector.
442 * @param array an array where pseudorandom numbers are filled
443 * by this function. The pointer to the array must be "aligned"
444 * (namely, must be a multiple of 16) in the SIMD version, since it
445 * refers to the address of a 128-bit integer. In the standard C
446 * version, the pointer is arbitrary.
448 * @param size the number of 64-bit pseudorandom integers to be
449 * generated. size must be a multiple of 2, and greater than or equal
450 * to (SFMT_MEXP / 128) * 2.
452 * @note \b memalign or \b posix_memalign is available to get aligned
453 * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
454 * returns the pointer to the aligned memory block.
456 void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) {
457 assert(size % 2 == 0);
458 assert(size >= DSFMT_N64);
459 gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2);
463 * This function generates double precision floating point
464 * pseudorandom numbers which distribute in the range (0, 1] to the
465 * specified array[] by one call. This function is the same as
466 * fill_array_close1_open2() except the distribution range.
468 * @param dsfmt dsfmt state vector.
469 * @param array an array where pseudorandom numbers are filled
471 * @param size the number of pseudorandom numbers to be generated.
472 * see also \sa fill_array_close1_open2()
474 void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) {
475 assert(size % 2 == 0);
476 assert(size >= DSFMT_N64);
477 gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2);
481 * This function generates double precision floating point
482 * pseudorandom numbers which distribute in the range [0, 1) to the
483 * specified array[] by one call. This function is the same as
484 * fill_array_close1_open2() except the distribution range.
486 * @param array an array where pseudorandom numbers are filled
488 * @param dsfmt dsfmt state vector.
489 * @param size the number of pseudorandom numbers to be generated.
490 * see also \sa fill_array_close1_open2()
492 void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) {
493 assert(size % 2 == 0);
494 assert(size >= DSFMT_N64);
495 gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2);
499 * This function generates double precision floating point
500 * pseudorandom numbers which distribute in the range (0, 1) to the
501 * specified array[] by one call. This function is the same as
502 * fill_array_close1_open2() except the distribution range.
504 * @param dsfmt dsfmt state vector.
505 * @param array an array where pseudorandom numbers are filled
507 * @param size the number of pseudorandom numbers to be generated.
508 * see also \sa fill_array_close1_open2()
510 void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) {
511 assert(size % 2 == 0);
512 assert(size >= DSFMT_N64);
513 gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2);
516 #if defined(__INTEL_COMPILER)
517 # pragma warning(disable:981)
520 * This function initializes the internal state array with a 32-bit
522 * @param dsfmt dsfmt state vector.
523 * @param seed a 32-bit integer used as the seed.
524 * @param mexp caller's mersenne expornent
526 void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) {
530 /* make sure caller program is compiled with the same MEXP */
531 if (mexp != dsfmt_mexp) {
532 fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n");
535 psfmt = &dsfmt->status[0].u32[0];
536 psfmt[idxof(0)] = seed;
537 for (i = 1; i < (DSFMT_N + 1) * 4; i++) {
538 psfmt[idxof(i)] = 1812433253UL
539 * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i;
542 period_certification(dsfmt);
543 dsfmt->idx = DSFMT_N64;
547 * This function initializes the internal state array,
548 * with an array of 32-bit integers used as the seeds
549 * @param dsfmt dsfmt state vector.
550 * @param init_key the array of 32-bit integers, used as a seed.
551 * @param key_length the length of init_key.
552 * @param mexp caller's mersenne expornent
554 void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[],
555 int key_length, int mexp) {
561 int size = (DSFMT_N + 1) * 4; /* pulmonary */
563 /* make sure caller program is compiled with the same MEXP */
564 if (mexp != dsfmt_mexp) {
565 fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n");
570 } else if (size >= 68) {
572 } else if (size >= 39) {
577 mid = (size - lag) / 2;
579 psfmt32 = &dsfmt->status[0].u32[0];
580 memset(dsfmt->status, 0x8b, sizeof(dsfmt->status));
581 if (key_length + 1 > size) {
582 count = key_length + 1;
586 r = ini_func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid % size)]
587 ^ psfmt32[idxof((size - 1) % size)]);
588 psfmt32[idxof(mid % size)] += r;
590 psfmt32[idxof((mid + lag) % size)] += r;
591 psfmt32[idxof(0)] = r;
593 for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
594 r = ini_func1(psfmt32[idxof(i)]
595 ^ psfmt32[idxof((i + mid) % size)]
596 ^ psfmt32[idxof((i + size - 1) % size)]);
597 psfmt32[idxof((i + mid) % size)] += r;
598 r += init_key[j] + i;
599 psfmt32[idxof((i + mid + lag) % size)] += r;
600 psfmt32[idxof(i)] = r;
603 for (; j < count; j++) {
604 r = ini_func1(psfmt32[idxof(i)]
605 ^ psfmt32[idxof((i + mid) % size)]
606 ^ psfmt32[idxof((i + size - 1) % size)]);
607 psfmt32[idxof((i + mid) % size)] += r;
609 psfmt32[idxof((i + mid + lag) % size)] += r;
610 psfmt32[idxof(i)] = r;
613 for (j = 0; j < size; j++) {
614 r = ini_func2(psfmt32[idxof(i)]
615 + psfmt32[idxof((i + mid) % size)]
616 + psfmt32[idxof((i + size - 1) % size)]);
617 psfmt32[idxof((i + mid) % size)] ^= r;
619 psfmt32[idxof((i + mid + lag) % size)] ^= r;
620 psfmt32[idxof(i)] = r;
624 period_certification(dsfmt);
625 dsfmt->idx = DSFMT_N64;
627 #if defined(__INTEL_COMPILER)
628 # pragma warning(default:981)
631 #if defined(__cplusplus)