From a2fd89f963a7374b29f7831e67b443c3d42c6e3c Mon Sep 17 00:00:00 2001 From: Kevin Chabowski Date: Thu, 1 Aug 2013 22:53:27 +0200 Subject: Added SFMT prng. --- SFMT/html/howto-compile.html | 493 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 493 insertions(+) create mode 100644 SFMT/html/howto-compile.html (limited to 'SFMT/html/howto-compile.html') diff --git a/SFMT/html/howto-compile.html b/SFMT/html/howto-compile.html new file mode 100644 index 0000000..8d08d1e --- /dev/null +++ b/SFMT/html/howto-compile.html @@ -0,0 +1,493 @@ + + + + + + How to compile SFMT + + + +

How to compile SFMT

+ +

+ This document explains how to compile SFMT for users who + are using UNIX like systems (for example Linux, Free BSD, + cygwin, osx, etc) on terminal. I can't help those who use IDE + (Integrated Development Environment,) please see your IDE's help + to use SIMD feature of your CPU. +

+ +

1. First Step: Compile test programs using Makefile.

+

1-1. Compile standard C test program.

+

+ Check if SFMT.c and Makefile are in your current directory. + If not, cd to the directory where they exist. + Then, type +

+
+
make std
+
+

+ If it causes an error, try to type +

+
+
cc -DSFMT_MEXP=19937 -o test-std-M19937 test.c SFMT.c
+
+

+ or try to type +

+
+
gcc -DSFMT_MEXP=19937 -o test-std-M19937 test.c SFMT.c
+
+

+ If success, then check the test program. Type +

+
+
./test-std-M19937 -b32
+
+

+ You will see many random numbers displayed on your screen. + If you want to check these random numbers are correct output, + redirect output to a file and diff it with + SFMT.19937.out.txt, like this:

+
+
./test-std-M19937 -b32 > foo.txt
+diff -w foo.txt SFMT.19937.out.txt
+
+

+ Silence means they are the same because diff + reports the difference of two file. +

+

+ If you want to know the generation speed of SFMT, type +

+
+
./test-std-M19937 -s
+
+

+ It is very slow. To make it fast, compile it + with -O3 option. If your compiler is gcc, you + should specify -fno-strict-aliasing option + with -O3. type +

+
+
gcc -O3 -fno-strict-aliasing -DSFMT_MEXP=19937 -o test-std-M19937 test.c SFMT.c
+./test-std-M19937 -s
+
+ +

1-2. Compile SSE2 test program.

+

+ If your CPU supports SSE2 and you can use gcc version 3.4 or later, + you can make test-sse2-Mxxx. To do this, type +

+
+
make sse2
+
+

or type

+
+
gcc -O3 -msse2 -fno-strict-aliasing -DHAVE_SSE2=1 -DSFMT_MEXP=19937 -o test-sse2-M19937 test.c SFMT.c
+
+

If everything works well,

+
+
./test-sse2-M19937 -s
+
+

will show much shorter time than test-std-M19937 -s.

+ + + +

1-4. Compile and check output automatically.

+

+ To make test program and check 32-bit output + automatically for all supported MEXPs of SFMT, type +

+
+
make std-check
+
+ +

+ To check test program optimized for SSE2, type +

+
+
make sse2-check
+
+ +

+ These commands may take some time. +

+ +

2. Second Step: Use SFMT pseudorandom number generator with + your C program.

+

2-1. Use sequential call and static link.

+

+ Here is a very simple program sample1.c which + calculates PI using Monte-Carlo method. +

+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "SFMT.h"
+
+int main(int argc, char* argv[]) {
+    int i, cnt, seed;
+    double x, y, pi;
+    const int NUM = 10000;
+    sfmt_t sfmt;
+
+    if (argc >= 2) {
+	seed = strtol(argv[1], NULL, 10);
+    } else {
+	seed = 12345;
+    }
+    cnt = 0;
+    sfmt_init_gen_rand(&sfmt, seed);
+    for (i = 0; i < NUM; i++) {
+	x = sfmt_genrand_res53(&sfmt);
+	y = sfmt_genrand_res53(&sfmt);
+	if (x * x + y * y < 1.0) {
+	    cnt++;
+	}
+    }
+    pi = (double)cnt / NUM * 4;
+    printf("%lf\n", pi);
+    return 0;
+}
+      
+
+

To compile sample1.c with SFMT.c with the period of + 2607, type

+
+
gcc -O3 -DSFMT_MEXP=607 -o sample1 SFMT.c sample1.c
+
+ +

If your CPU supports SSE2 and you want to use optimized SFMT for + SSE2, type

+
+
gcc -O3 -msse2 -DHAVE_SSE2 -DSFMT_MEXP=607 -o sample1 SFMT.c sample1.c
+
+ + +

2-2. Use block call and static link.

+

+ Here is sample2.c which modifies sample1.c. + The block call fill_array64 is much faster than + sequential call, but it needs an aligned memory. The standard function + to get an aligned memory is posix_memalign, but + it isn't usable in every OS. +

+
+
+#include <stdio.h>
+#define _XOPEN_SOURCE 600
+#include <stdlib.h>
+#include "SFMT.h"
+
+int main(int argc, char* argv[]) {
+    int i, j, cnt, seed;
+    double x, y, pi;
+    const int NUM = 10000;
+    const int R_SIZE = 2 * NUM;
+    int size;
+    uint64_t *array;
+    sfmt_t sfmt;
+
+    if (argc >= 2) {
+	seed = strtol(argv[1], NULL, 10);
+    } else {
+	seed = 12345;
+    }
+    size = sfmt_get_min_array_size64(&sfmt);
+    if (size < R_SIZE) {
+	size = R_SIZE;
+    }
+#if defined(__APPLE__) || \
+    (defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD__ <= 6)
+    printf("malloc used\n");
+    array = malloc(sizeof(double) * size);
+    if (array == NULL) {
+	printf("can't allocate memory.\n");
+	return 1;
+    }
+#elif defined(_POSIX_C_SOURCE)
+    printf("posix_memalign used\n");
+    if (posix_memalign((void **)&array, 16, sizeof(double) * size) != 0) {
+	printf("can't allocate memory.\n");
+	return 1;
+    }
+#elif defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
+    printf("memalign used\n");
+    array = memalign(16, sizeof(double) * size);
+    if (array == NULL) {
+	printf("can't allocate memory.\n");
+	return 1;
+    }
+#else /* in this case, gcc doesn't support SSE2 */
+    printf("malloc used\n");
+    array = malloc(sizeof(double) * size);
+    if (array == NULL) {
+	printf("can't allocate memory.\n");
+	return 1;
+    }
+#endif
+    cnt = 0;
+    j = 0;
+    sfmt_init_gen_rand(&sfmt, seed);
+    sfmt_fill_array64(&sfmt, array, size);
+    for (i = 0; i < NUM; i++) {
+	x = sfmt_to_res53(array[j++]);
+	y = sfmt_to_res53(array[j++]);
+	if (x * x + y * y < 1.0) {
+	    cnt++;
+	}
+    }
+    free(array);
+    pi = (double)cnt / NUM * 4;
+    printf("%lf\n", pi);
+    return 0;
+}
+      
+
+

To compile sample2.c with SFMT.c with the period of + 22281, type

+
+
gcc -O3 -DSFMT_MEXP=2281 -o sample2 SFMT.c sample2.c
+
+ +

If your CPU supports SSE2 and you want to use optimized SFMT for + SSE2, type

+
+
gcc -O3 -msse2 -DHAVE_SSE2 -DSFMT_MEXP=2281 -o sample2 SFMT.c sample2.c
+
+ + + +

2-4. Initialize SFMT using sfmt_init_by_array function.

+

+ Here is sample4.c which modifies sample1.c. + The 32-bit integer seed can only make 232 kinds of + initial state, to avoid this problem, SFMT + provides sfmt_init_by_array function. This sample + uses sfmt_init_by_array function which initialize the internal state + array with an array of 32-bit. The size of an array can be + larger than the internal state array and all elements of the + array are used for initialization, but too large array is + wasteful. +

+
+
+#include <stdio.h>
+#include <string.h>
+#include "SFMT.h"
+
+int main(int argc, char* argv[]) {
+    int i, cnt, seed_cnt;
+    double x, y, pi;
+    const int NUM = 10000;
+    uint32_t seeds[100];
+    sfmt_t sfmt;
+
+    if (argc >= 2) {
+	seed_cnt = 0;
+	for (i = 0; (i < 100) && (i < strlen(argv[1])); i++) {
+	    seeds[i] = argv[1][i];
+	    seed_cnt++;
+	}
+    } else {
+	seeds[0] = 12345;
+	seed_cnt = 1;
+    }
+    cnt = 0;
+    sfmt_init_by_array(&sfmt, seeds, seed_cnt);
+    for (i = 0; i < NUM; i++) {
+	x = sfmt_genrand_res53(&sfmt);
+	y = sfmt_genrand_res53(&sfmt);
+	if (x * x + y * y < 1.0) {
+	    cnt++;
+	}
+    }
+    pi = (double)cnt / NUM * 4;
+    printf("%lf\n", pi);
+    return 0;
+}
+      
+
+

To compile sample4.c, type

+
+
gcc -O3 -DSFMT_MEXP=19937 -o sample4 SFMT.c sample4.c
+
+ +

Now, seed can be a string. Like this:

+
+
./sample4 your-full-name
+
+

Appendix: C preprocessor definitions

+

+ Here is a list of C preprocessor definitions that users can + specify to control code generation. These macros must be set + just after -D compiler option. +

+
+
SFMT_MEXP
+
This macro is required. This macro means Mersenne exponent + and the period of generated code will be 2SFMT_MEXP-1. + SFMT_MEXP must be one of 607, 1279, 2281, 4253, 11213, 19937, + 44497, 86243, 132049, 216091. +
+
HAVE_SSE2
+
This is optional. If this macro is specified, optimized code + for SSE2 will be generated.
+
HAVE_ALTIVEC
+
This is optional. If this macro is specified, optimized code + for AltiVec will be generated. This macro automatically turns on + BIG_ENDIAN64 macro. This macro of SFMT ver. 1.4 is not tested + at all.
+
BIG_ENDIAN64
+
This macro is required when your CPU is BIG ENDIAN and you + use 64-bit output. If __BIG_ENDIAN__ macro is defined, this macro + is automatically turned on. GCC defines __BIG_ENDIAN__ macro on + BIG ENDIAN CPUs. This macro of SFMT ver. 1.4 is not tested + at all.
+
ONLY64
+
This macro is optional. If this macro is specified, + optimized code for 64-bit output for BIG ENDIAN CPUs will be + generated and code for 32-bit output won't be + generated. BIG_ENDIAN64 macro must be specified with this macro + by user or automatically. This macro of SFMT ver. 1.4 is not tested + at all.
+
+ + + + + + +
32-bit outputLITTLE ENDIAN 64-bit outputBIG ENDIAN 64-bit output
requiredSFMT_MEXPSFMT_MEXPSFMT_MEXP, + BIG_ENDIAN64
optionalHAVE_SSE2, + HAVE_ALTIVECHAVE_SSE2HAVE_ALTIVEC, ONLY64
+ + -- cgit v1.2.3-70-g09d2