-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathfind_E_grid_index.cu
99 lines (78 loc) · 3.04 KB
/
find_E_grid_index.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#include <cuda.h>
#include <stdio.h>
#include "datadef.h"
__global__ void find_E_grid_index_kernel(unsigned N, cross_section_data* d_xsdata , unsigned* remap, float* E , unsigned * index, unsigned* rxn){
int tid_in = threadIdx.x+blockIdx.x*blockDim.x;
if (tid_in >= N){return;}
// declare shared variables
__shared__ unsigned energy_grid_len;
__shared__ float* energy_grid;
// have thread 0 of block copy all pointers and static info into shared memory
if (threadIdx.x == 0){
energy_grid_len = d_xsdata[0].energy_grid_len;
energy_grid = d_xsdata[0].energy_grid;
}
// make sure shared loads happen before anything else
__syncthreads();
// return if terminated
unsigned this_rxn=rxn[tid_in];
if (this_rxn>=900){return;}
// remap
int tid=remap[tid_in];
// load data
float value = E[tid];
// init local
unsigned cnt = 0;
unsigned powtwo = 2;
int dex = (energy_grid_len-1) / 2; // N_energies starts at 1, duh
// check edges
float grid_max = energy_grid[energy_grid_len-1];
float grid_min = energy_grid[0];
if (value < grid_min){
dex=-2; // encoding for under min interpolation (2^32-2, or -2)
}
else if (value > grid_max){
dex=-1; // encoding for over max interpolation (2^32-1, or -1)
}
else{
// do the search
for(cnt=0;cnt<=30;cnt++){
powtwo = powtwo * 2;
if ( energy_grid[dex] <= value &&
energy_grid[dex+1] > value ) { break; }
else if ( energy_grid[dex] > value ) { dex = dex - ((energy_grid_len / powtwo) + 1) ;} // +1's are to do a ceiling instead of a floor on integer division
else if ( energy_grid[dex] < value ) { dex = dex + ((energy_grid_len / powtwo) + 1) ;}
if(cnt==30){
printf("binary search iteration overflow! %p len %d val % 6.4E tid=%u rxn=%u\n",energy_grid,energy_grid_len,value,tid,this_rxn);
dex=0;
}
// edge checks... fix later???
if(dex<0){
dex=0;
}
if(dex>=energy_grid_len){
dex=energy_grid_len-1;
}
}
}
//write output index
index[tid]=dex;
//printf("rxn %u, remap[%i]=%u E %6.4E dex %u\n",this_rxn,tid_in,tid, value, dex);
}
/**
* \brief a
* \details b
*
* @param[in] NUM_THREADS - the number of threads to run per thread block
* @param[in] N - the total number of threads to launch on the grid
* @param[in] d_xsdata - device pointer to cross section data pointer array
* @param[in] d_remap - device pointer to data remapping vector
* @param[in] d_E - device pointer to energy data array
* @param[in] d_index - device pointer to index array (stores the unionized grid index of the current energy)
* @param[in] d_rxn - device pointer of the reaction number array
*/
void find_E_grid_index(unsigned NUM_THREADS, unsigned N, cross_section_data* d_xsdata, unsigned* d_remap, float* d_E , unsigned * d_index , unsigned* d_rxn){
unsigned blks = ( N + NUM_THREADS - 1 ) / NUM_THREADS;
find_E_grid_index_kernel <<< blks, NUM_THREADS >>> ( N, d_xsdata, d_remap, d_E , d_index , d_rxn);
cudaThreadSynchronize();
}