Random number generator on GPU

Pseudo-random number generation on CPU is simple - we need to store previous state and use it to generate the next number.
On a GPU, it's need more attention, we need to generate random numbers in each instance independently.

That is, we need to keep for each instance a state, randomly initialized on CPU, and then update the state, using corresponding functions.

The good explanation and particular implementation of good random generator is discussed here:

It's about method explained in NVidia's book "GPU Gems", Chapter 37. Efficient Random Number Generation and Application Using CUDA
https://developer.nvidia.com/gpugems/GPUGems3/gpugems3_ch37.html

So, here is my code:

//--------------------------------------------------------------------------
//Random generator for GPU
//Based on
//https://math.stackexchange.com/questions/337782/pseudo-random-number-generation-on-the-gpu#answers-header
//Note that the initial seed values for state should be larger than 128!

//typedef unsigned int uint; //32-bit unsigned integer
//struct uvec4 {
// uint x = 0;
// uint y = 0;
// uint z = 0;
// uint w = 0;
// uvec4() {}
// uvec4(uint x0, uint y0, uint z0, uint w0) {
// x = x0; y = y0; z = z0; w = w0;
// }
//};

//Usage:
/*

//one-time generation, such as particles starting position
//(Note, it works good for N=1..100 particles, for N>100000 - use two-level generation,
//that is create N/1000 seeds and then use them for create per 1000 particles on each, see example below

RandomState rnd;
rnd.state = uvec4(1884,2991,222,334);

for (int i=0; i<N; i++) {
  random2(rnd);

  vec2 pos = rnd.value.xy;
  pos.x *= w;
  pos.y *= h;

  worm[i].pos = vec4(pos,0,1);
}


//Generate many seeds
int N = 100;

Buffer<uvec4> states(N);
...

//Initialization
RandomState rnd;
rnd.state = uvec4(1884,2991,222,334);

//states
for (int i=0; i<N; i++) {
  uvec4 s;
  random4(rnd);
  s.x = int(rnd.value.x*10000)+150;
  s.y = int(rnd.value.y*10000)+150;
  s.z = int(rnd.value.z*10000)+150;
  s.w = int(rnd.value.w*10000)+150;
  states[i] = s;
}

//Using seeds for generating new values
//1) get seed
RandomState rnd;
rnd.state = states[i];

//2) generate values
random2(rnd);
float x = rnd.value.x * w;
float y = rnd.value.y * h;

//3) store updated seed
states[i] = rnd.state;

*/


//--------------------------------------------------------------


struct RandomState
{
  uvec4 state; //16 bytes; Note that the initial seed values for state should be larger than 128!
  vec4 value; //16 bytes
};

//--------------------------------------------------------------------------
//Random generator for GPU

uint TausStep(uint z, int S1, int S2, int S3, uint M)
{
  uint b = (((z << S1) ^ z) >> S2);
  return (((z & M) << S3) ^ b);
}

uint LCGStep(uint z, uint A, uint C)
{
  return (A * z + C);
}

//------------------------------------------------
//Generate 1,2,3,4 random values, stored in value.x,y,z,w
void random1(inout RandomState rnd)
{
  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

}

void random2(inout RandomState rnd)
{
  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.y = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

}

void random3(inout RandomState rnd)
{
  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.y = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.z = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

}

void random4(inout RandomState rnd)
{
  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.y = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.z = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.w = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
}


Comments

Post a Comment

Popular posts from this blog

Computing ray origin and direction from Model View Projection matrices for raymarching

Forward and backward alpha blending for raymarching

Forward, Deferred and Raytracing rendering in openFrameworks and web