Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move RNG in runtime/random_numbers.cu to use Array instead of uint4/uint2. #3738

Open
wants to merge 1 commit into
base: array_use
Choose a base branch
from

Conversation

csarofeen
Copy link
Collaborator

No description provided.

@csarofeen
Copy link
Collaborator Author

!test

Copy link

PR Reviewer Guide 🔍

Here are some key observations to aid the review process:

⏱️ Estimated effort to review: 3 🔵🔵🔵⚪⚪
🧪 No relevant tests
⚡ Recommended focus areas for review

Possible Logic Change

The function genPrologue() has been modified to use Array<uint32_t, 4> instead of uint4 for the rng_result variable. This change may affect the logic of the function and should be reviewed carefully.

indent() << "Array<uint32_t, 4> rng_result;\n";
Possible Logic Change

The functions single_round() and philox() have been modified to use Array<uint32_t, 4> and Array<uint32_t, 2> instead of uint4 and uint2 respectively. These changes may affect the logic of the functions and should be reviewed carefully.

__device__ Array<uint32_t, 4> single_round(Array<uint32_t, 4> ctr, Array<uint32_t, 2> key) {
  constexpr unsigned long kPhiloxSA = 0xD2511F53;
  constexpr unsigned long kPhiloxSB = 0xCD9E8D57;
  unsigned int hi0;
  unsigned int hi1;
  unsigned int lo0 = mulhilo32(kPhiloxSA, ctr[0], &hi0);
  unsigned int lo1 = mulhilo32(kPhiloxSB, ctr[2], &hi1);
  Array<uint32_t, 4> ret = {hi1 ^ ctr[1] ^ key[0], lo1, hi0 ^ ctr[3] ^ key[1], lo0};
  return ret;
}

__device__ Array<uint32_t, 4> philox(
    unsigned long long seed,
    unsigned long long subsequence,
    unsigned long long offset) {
  constexpr unsigned long kPhilox10A = 0x9E3779B9;
  constexpr unsigned long kPhilox10B = 0xBB67AE85;
  Array<uint32_t, 2> key;
  key[0] = (unsigned int)seed;
  key[1] = (unsigned int)(seed >> 32);
  Array<uint32_t, 4> counter;
  counter[0] = (unsigned int)(offset);
  counter[1] = (unsigned int)(offset >> 32);
  counter[2] = (unsigned int)(subsequence);
  counter[3] = (unsigned int)(subsequence >> 32);

  Array<uint32_t, 4> output = {};
  Array<uint32_t, 2> key_ = key;
  Array<uint32_t, 4> counter_ = counter;
  for (int i = 0; i < 9; i++) {
    counter_ = single_round(counter_, key_);
    key_[0] += (kPhilox10A);
    key_[1] += (kPhilox10B);
  }
  output = single_round(counter_, key_);
  return output;
}
Possible Logic Change

The functions rng_uniform(), rng_uniformf(), rng_uniform_half(), and rng_uniform_bfloat() have been modified to use Array<uint32_t, 4> instead of uint4 for the rng_result variable. These changes may affect the logic of the functions and should be reviewed carefully.

__device__ double rng_uniform(const Array<uint32_t, 4>& rng_result, int rng_component) {
  return uniform(
      rng_result[rng_component * 2],
      rng_result[rng_component * 2 + 1]);
}

__device__ float rng_uniformf(const Array<uint32_t, 4>& rng_result, int rng_component) {
  return uniformf(rng_result[rng_component]);
}

__device__ __half rng_uniform_half(const Array<uint32_t, 4>& rng_result, int rng_component) {
  return uniform_half(rng_result[rng_component]);
}

__device__ __bfloat
rng_uniform_bfloat(const Array<uint32_t, 4>& rng_result, int rng_component) {
  return uniform_bfloat(rng_result[rng_component]);
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant