Random number generator on GPU
Pseudo-random number generation on CPU is simple - we need to store previous state and use it to generate the next number.
On a GPU, it's need more attention, we need to generate random numbers in each instance independently.
That is, we need to keep for each instance a state, randomly initialized on CPU, and then update the state, using corresponding functions.
That is, we need to keep for each instance a state, randomly initialized on CPU, and then update the state, using corresponding functions.
The good explanation and particular implementation of good random generator is discussed here:
It's about method explained in NVidia's book "GPU Gems", Chapter 37. Efficient Random Number Generation and Application Using CUDA
https://developer.nvidia.com/gpugems/GPUGems3/gpugems3_ch37.html
https://developer.nvidia.com/gpugems/GPUGems3/gpugems3_ch37.html
So, here is my code:
//--------------------------------------------------------------------------
//Random generator for GPU
//Based on
//https://math.stackexchange.com/questions/337782/pseudo-random-number-generation-on-the-gpu#answers-header
//Note that the initial seed values for state should be larger than 128!
//typedef unsigned int uint; //32-bit unsigned integer
//struct uvec4 {
// uint x = 0;
// uint y = 0;
// uint z = 0;
// uint w = 0;
// uvec4() {}
// uvec4(uint x0, uint y0, uint z0, uint w0) {
// x = x0; y = y0; z = z0; w = w0;
// }
//};
//Usage:
/*
//one-time generation, such as particles starting position
//(Note, it works good for N=1..100 particles, for N>100000 - use two-level generation,
//that is create N/1000 seeds and then use them for create per 1000 particles on each, see example below
RandomState rnd;
rnd.state = uvec4(1884,2991,222,334);
for (int i=0; i<N; i++) {
random2(rnd);
vec2 pos = rnd.value.xy;
pos.x *= w;
pos.y *= h;
worm[i].pos = vec4(pos,0,1);
}
//Generate many seeds
int N = 100;
Buffer<uvec4> states(N);
...
//Initialization
RandomState rnd;
rnd.state = uvec4(1884,2991,222,334);
//states
for (int i=0; i<N; i++) {
uvec4 s;
random4(rnd);
s.x = int(rnd.value.x*10000)+150;
s.y = int(rnd.value.y*10000)+150;
s.z = int(rnd.value.z*10000)+150;
s.w = int(rnd.value.w*10000)+150;
states[i] = s;
}
//Using seeds for generating new values
//1) get seed
RandomState rnd;
rnd.state = states[i];
//2) generate values
random2(rnd);
float x = rnd.value.x * w;
float y = rnd.value.y * h;
//3) store updated seed
states[i] = rnd.state;
*/
//--------------------------------------------------------------
struct RandomState
{
uvec4 state; //16 bytes; Note that the initial seed values for state should be larger than 128!
vec4 value; //16 bytes
};
//--------------------------------------------------------------------------
//Random generator for GPU
uint TausStep(uint z, int S1, int S2, int S3, uint M)
{
uint b = (((z << S1) ^ z) >> S2);
return (((z & M) << S3) ^ b);
}
uint LCGStep(uint z, uint A, uint C)
{
return (A * z + C);
}
//------------------------------------------------
//Generate 1,2,3,4 random values, stored in value.x,y,z,w
void random1(inout RandomState rnd)
{
rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);
rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
}
void random2(inout RandomState rnd)
{
rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);
rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);
rnd.value.y = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
}
void random3(inout RandomState rnd)
{
rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);
rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);
rnd.value.y = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);
rnd.value.z = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
}
void random4(inout RandomState rnd)
{
rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);
rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);
rnd.value.y = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);
rnd.value.z = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);
rnd.value.w = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
}
Actually, seeds initialization can be made in GPU too :)
ReplyDelete