Random number generator on GPU

Pseudo-random number generation on CPU is simple - we need to store previous state and use it to generate the next number.
On a GPU, it's need more attention, we need to generate random numbers in each instance independently.

That is, we need to keep for each instance a state, randomly initialized on CPU, and then update the state, using corresponding functions.

The good explanation and particular implementation of good random generator is discussed here:

It's about method explained in NVidia's book "GPU Gems", Chapter 37. Efficient Random Number Generation and Application Using CUDA
https://developer.nvidia.com/gpugems/GPUGems3/gpugems3_ch37.html

So, here is my code:

//--------------------------------------------------------------------------
//Random generator for GPU
//Based on
//https://math.stackexchange.com/questions/337782/pseudo-random-number-generation-on-the-gpu#answers-header
//Note that the initial seed values for state should be larger than 128!

//typedef unsigned int uint; //32-bit unsigned integer
//struct uvec4 {
// uint x = 0;
// uint y = 0;
// uint z = 0;
// uint w = 0;
// uvec4() {}
// uvec4(uint x0, uint y0, uint z0, uint w0) {
// x = x0; y = y0; z = z0; w = w0;
// }
//};

//Usage:
/*

//one-time generation, such as particles starting position
//(Note, it works good for N=1..100 particles, for N>100000 - use two-level generation,
//that is create N/1000 seeds and then use them for create per 1000 particles on each, see example below

RandomState rnd;
rnd.state = uvec4(1884,2991,222,334);

for (int i=0; i<N; i++) {
  random2(rnd);

  vec2 pos = rnd.value.xy;
  pos.x *= w;
  pos.y *= h;

  worm[i].pos = vec4(pos,0,1);
}


//Generate many seeds
int N = 100;

Buffer<uvec4> states(N);
...

//Initialization
RandomState rnd;
rnd.state = uvec4(1884,2991,222,334);

//states
for (int i=0; i<N; i++) {
  uvec4 s;
  random4(rnd);
  s.x = int(rnd.value.x*10000)+150;
  s.y = int(rnd.value.y*10000)+150;
  s.z = int(rnd.value.z*10000)+150;
  s.w = int(rnd.value.w*10000)+150;
  states[i] = s;
}

//Using seeds for generating new values
//1) get seed
RandomState rnd;
rnd.state = states[i];

//2) generate values
random2(rnd);
float x = rnd.value.x * w;
float y = rnd.value.y * h;

//3) store updated seed
states[i] = rnd.state;

*/


//--------------------------------------------------------------


struct RandomState
{
  uvec4 state; //16 bytes; Note that the initial seed values for state should be larger than 128!
  vec4 value; //16 bytes
};

//--------------------------------------------------------------------------
//Random generator for GPU

uint TausStep(uint z, int S1, int S2, int S3, uint M)
{
  uint b = (((z << S1) ^ z) >> S2);
  return (((z & M) << S3) ^ b);
}

uint LCGStep(uint z, uint A, uint C)
{
  return (A * z + C);
}

//------------------------------------------------
//Generate 1,2,3,4 random values, stored in value.x,y,z,w
void random1(inout RandomState rnd)
{
  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

}

void random2(inout RandomState rnd)
{
  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.y = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

}

void random3(inout RandomState rnd)
{
  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.y = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.z = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

}

void random4(inout RandomState rnd)
{
  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.x = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.y = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.z = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));

  rnd.state.x = TausStep(rnd.state.x, 13, 19, 12, 4294967294);
  rnd.state.y = TausStep(rnd.state.y, 2, 25, 4, 4294967288);
  rnd.state.z = TausStep(rnd.state.z, 3, 11, 17, 4294967280);
  rnd.state.w = LCGStep(rnd.state.w, 1664525, 1013904223);

  rnd.value.w = float(2.3283064365387e-10 * double(rnd.state.x ^ rnd.state.y ^ rnd.state.z ^ rnd.state.w));
}


Comments

Post a Comment

Popular posts from this blog

Computing ray origin and direction from Model View Projection matrices for raymarching

Create Blueprint Library, print to log and using windows.h in a Unreal Engine C++ project

Forward, Deferred and Raytracing rendering in openFrameworks and web