X-Git-Url: http://git.osdn.jp/view?a=blobdiff_plain;f=psychlops%2Fcore%2Fmath%2FdSFMT2.0%2FdSFMT.c;h=e86449ac32afa9dae9da3fae38e4bec48defeccc;hb=00d1c2a000216184893b59166c09927d2fd4deba;hp=bd9d157f0b16dd24a3bec2670756611a15476627;hpb=3eaa8d4d651064b4dc33cc86faa55b30b317d1b4;p=psychlops%2Fcpp.git diff --git a/psychlops/core/math/dSFMT2.0/dSFMT.c b/psychlops/core/math/dSFMT2.0/dSFMT.c index bd9d157..e86449a 100644 --- a/psychlops/core/math/dSFMT2.0/dSFMT.c +++ b/psychlops/core/math/dSFMT2.0/dSFMT.c @@ -15,6 +15,11 @@ #include #include #include "dSFMT-params.h" +#include "dSFMT-common.h" + +#if defined(__cplusplus) +extern "C" { +#endif /** dsfmt internal state vector */ dsfmt_t dsfmt_global_data; @@ -26,30 +31,25 @@ static const int dsfmt_mexp = DSFMT_MEXP; ----------------*/ inline static uint32_t ini_func1(uint32_t x); inline static uint32_t ini_func2(uint32_t x); -inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t array[], +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, int size); -inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t array[], +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, int size); -inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t array[], +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, int size); -inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t array[], +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, int size); inline static int idxof(int i); static void initial_mask(dsfmt_t *dsfmt); static void period_certification(dsfmt_t *dsfmt); #if defined(HAVE_SSE2) -# include -/** mask data for sse2 */ -static __m128i sse2_param_mask; /** 1 in 64bit for sse2 */ -static __m128i sse2_int_one; +static const union X128I_T sse2_int_one = {{1, 1}}; /** 2.0 double for sse2 */ -static __m128d sse2_double_two; +static const union X128D_T sse2_double_two = {{2.0, 2.0}}; /** -1.0 double for sse2 */ -static __m128d sse2_double_m_one; - -static void setup_const(void); +static const union X128D_T sse2_double_m_one = {{-1.0, -1.0}}; #endif /** @@ -66,105 +66,6 @@ inline static int idxof(int i) { } #endif -/** - * This function represents the recursion formula. - * @param r output - * @param a a 128-bit part of the internal state array - * @param b a 128-bit part of the internal state array - * @param lung a 128-bit part of the internal state array - */ -#if defined(HAVE_ALTIVEC) -inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, - w128_t *lung) { - const vector unsigned char sl1 = ALTI_SL1; - const vector unsigned char sl1_perm = ALTI_SL1_PERM; - const vector unsigned int sl1_msk = ALTI_SL1_MSK; - const vector unsigned char sr1 = ALTI_SR; - const vector unsigned char sr1_perm = ALTI_SR_PERM; - const vector unsigned int sr1_msk = ALTI_SR_MSK; - const vector unsigned char perm = ALTI_PERM; - const vector unsigned int msk1 = ALTI_MSK; - vector unsigned int w, x, y, z; - - z = a->s; - w = lung->s; - x = vec_perm(w, (vector unsigned int)perm, perm); - y = vec_perm(z, sl1_perm, sl1_perm); - y = vec_sll(y, sl1); - y = vec_and(y, sl1_msk); - w = vec_xor(x, b->s); - w = vec_xor(w, y); - x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm); - x = vec_srl(x, sr1); - x = vec_and(x, sr1_msk); - y = vec_and(w, msk1); - z = vec_xor(z, y); - r->s = vec_xor(z, x); - lung->s = w; -} -#elif defined(HAVE_SSE2) -/** - * This function setup some constant variables for SSE2. - */ -static void setup_const(void) { - static int first = 1; - if (!first) { - return; - } - sse2_param_mask = _mm_set_epi32(DSFMT_MSK32_3, DSFMT_MSK32_4, - DSFMT_MSK32_1, DSFMT_MSK32_2); - sse2_int_one = _mm_set_epi32(0, 1, 0, 1); - sse2_double_two = _mm_set_pd(2.0, 2.0); - sse2_double_m_one = _mm_set_pd(-1.0, -1.0); - first = 0; -} - -/** - * This function represents the recursion formula. - * @param r output 128-bit - * @param a a 128-bit part of the internal state array - * @param b a 128-bit part of the internal state array - * @param d a 128-bit part of the internal state array (I/O) - */ -inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) { - __m128i v, w, x, y, z; - - x = a->si; - z = _mm_slli_epi64(x, DSFMT_SL1); - y = _mm_shuffle_epi32(u->si, SSE2_SHUFF); - z = _mm_xor_si128(z, b->si); - y = _mm_xor_si128(y, z); - - v = _mm_srli_epi64(y, DSFMT_SR); - w = _mm_and_si128(y, sse2_param_mask); - v = _mm_xor_si128(v, x); - v = _mm_xor_si128(v, w); - r->si = v; - u->si = y; -} -#else /* standard C */ -/** - * This function represents the recursion formula. - * @param r output 128-bit - * @param a a 128-bit part of the internal state array - * @param b a 128-bit part of the internal state array - * @param lung a 128-bit part of the internal state array (I/O) - */ -inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, - w128_t *lung) { - uint64_t t0, t1, L0, L1; - - t0 = a->u[0]; - t1 = a->u[1]; - L0 = lung->u[0]; - L1 = lung->u[1]; - lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0]; - lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1]; - r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0; - r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1; -} -#endif - #if defined(HAVE_SSE2) /** * This function converts the double precision floating point numbers which @@ -173,7 +74,7 @@ inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, * @param w 128bit stracture of double precision floating point numbers (I/O) */ inline static void convert_c0o1(w128_t *w) { - w->sd = _mm_add_pd(w->sd, sse2_double_m_one); + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); } /** @@ -183,7 +84,7 @@ inline static void convert_c0o1(w128_t *w) { * @param w 128bit stracture of double precision floating point numbers (I/O) */ inline static void convert_o0c1(w128_t *w) { - w->sd = _mm_sub_pd(sse2_double_two, w->sd); + w->sd = _mm_sub_pd(sse2_double_two.d128, w->sd); } /** @@ -193,8 +94,8 @@ inline static void convert_o0c1(w128_t *w) { * @param w 128bit stracture of double precision floating point numbers (I/O) */ inline static void convert_o0o1(w128_t *w) { - w->si = _mm_or_si128(w->si, sse2_int_one); - w->sd = _mm_add_pd(w->sd, sse2_double_m_one); + w->si = _mm_or_si128(w->si, sse2_int_one.i128); + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); } #else /* standard C and altivec */ /** @@ -240,7 +141,7 @@ inline static void convert_o0o1(w128_t *w) { * @param array an 128-bit array to be filled by pseudorandom numbers. * @param size number of 128-bit pseudorandom numbers to be generated. */ -inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t array[], +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, int size) { int i, j; w128_t lung; @@ -278,7 +179,7 @@ inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t array[], * @param array an 128-bit array to be filled by pseudorandom numbers. * @param size number of 128-bit pseudorandom numbers to be generated. */ -inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t array[], +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, int size) { int i, j; w128_t lung; @@ -321,7 +222,7 @@ inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t array[], * @param array an 128-bit array to be filled by pseudorandom numbers. * @param size number of 128-bit pseudorandom numbers to be generated. */ -inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t array[], +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, int size) { int i, j; w128_t lung; @@ -364,7 +265,7 @@ inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t array[], * @param array an 128-bit array to be filled by pseudorandom numbers. * @param size number of 128-bit pseudorandom numbers to be generated. */ -inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t array[], +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, int size) { int i, j; w128_t lung; @@ -440,10 +341,14 @@ static void initial_mask(dsfmt_t *dsfmt) { * @param dsfmt dsfmt state vector. */ static void period_certification(dsfmt_t *dsfmt) { - int i, j; uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2}; uint64_t tmp[2]; uint64_t inner; + int i; +#if (DSFMT_PCV2 & 1) != 1 + int j; + uint64_t work; +#endif tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1); tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2); @@ -462,7 +367,6 @@ static void period_certification(dsfmt_t *dsfmt) { #if (DSFMT_PCV2 & 1) == 1 dsfmt->status[DSFMT_N].u[1] ^= 1; #else - uint64_t work; for (i = 1; i >= 0; i--) { work = 1; for (j = 0; j < 64; j++) { @@ -637,9 +541,6 @@ void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) { initial_mask(dsfmt); period_certification(dsfmt); dsfmt->idx = DSFMT_N64; -#if defined(HAVE_SSE2) - setup_const(); -#endif } /** @@ -688,7 +589,6 @@ void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], r += key_length; psfmt32[idxof((mid + lag) % size)] += r; psfmt32[idxof(0)] = r; - i = 1; count--; for (i = 1, j = 0; (j < count) && (j < key_length); j++) { r = ini_func1(psfmt32[idxof(i)] @@ -723,10 +623,11 @@ void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], initial_mask(dsfmt); period_certification(dsfmt); dsfmt->idx = DSFMT_N64; -#if defined(HAVE_SSE2) - setup_const(); -#endif } #if defined(__INTEL_COMPILER) # pragma warning(default:981) #endif + +#if defined(__cplusplus) +} +#endif