-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCLRadixSort.hpp
129 lines (95 loc) · 3.66 KB
/
CLRadixSort.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// C++ class for sorting integer list in OpenCL
// copyright Philippe Helluy, Université de Strasbourg, France, 2011, [email protected]
// licensed under the GNU Lesser General Public License see http://www.gnu.org/copyleft/lesser.html
// if you find this software usefull you can cite the following work in your reports or articles:
// Philippe HELLUY, A portable implementation of the radix sort algorithm in OpenCL, 2011.
// http://hal.archives-ouvertes.fr/hal-00596730
// The algorithm is the radix sort algorithm
// Marcho Zagha and Guy E. Blelloch. “Radix Sort For Vector Multiprocessor.”
// in: Conference on High Performance Networking and Computing, pp. 712-721, 1991.
// each integer is made of _TOTALBITS bits. The radix is made of _BITS bits. The sort is made of
// several passes, each consisting in sorting against a group of bits corresponding to the radix.
// _TOTALBITS/_BITS passes are needed.
// The sorting parameters can be changed in "CLRadixSortParam.hpp"
// compilation for Mac:
//g++ CLRadixSort.cpp CLRadixSortMain.cpp -framework opencl -Wall
// compilation for Linux:
//g++ CLRadixSort.cpp CLRadixSortMain.cpp -lOpenCL -Wall
#ifndef _CLRADIXSORT
#define _CLRADIXSORT
#include "CLRadixSortParam.hpp"
#if defined (__APPLE__) || defined(MACOSX)
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
typedef cl_uint uint;
#include <string>
#include<fstream>
#include<iostream>
#include<assert.h>
#include<math.h>
#include <stdlib.h>
using namespace std;
class CLRadixSort{
friend ostream &operator<<(ostream &os, CLRadixSort &r);
public:
CLRadixSort(cl_context Context,
cl_device_id NumDevice,
cl_command_queue CommandQueue);
CLRadixSort() {};
~CLRadixSort();
// this function allows to change the size of the sorted vector
void Resize(int nn);
// this function treats the array d_Keys on the GPU
// and return the sorting permutation in the array d_Permut
void Sort();
// get the data from the GPU (for debugging)
void RecupGPU(void);
// put the data on the host in the GPU
void Host2GPU(void);
// check that the sort is successfull (for debugging)
void Check(void);
// sort a set of particles (for debugging)
void PICSorting(void);
// transpose the list for faster memeory access
// (improve coalescence)
void Transpose(int nbrow,int nbcol);
// compute the histograms for one pass
void Histogram(uint pass);
// scan the histograms
void ScanHistogram(void);
// scan the histograms
void Reorder(uint pass);
cl_context Context; // OpenCL context
cl_device_id NumDevice; // OpenCL Device
cl_command_queue CommandQueue; // OpenCL command queue
cl_program Program; // OpenCL program
uint h_Histograms[_RADIX * _GROUPS * _ITEMS]; // histograms on the cpu
cl_mem d_Histograms; // histograms on the GPU
// sum of the local histograms
uint h_globsum[_HISTOSPLIT];
cl_mem d_globsum;
cl_mem d_temp; // in case where the sum is not needed
// list of keys
uint nkeys; // actual number of keys
uint nkeys_rounded; // next multiple of _ITEMS*_GROUPS
uint h_checkKeys[_N]; // a copy for check
uint h_Keys[_N];
cl_mem d_inKeys;
cl_mem d_outKeys;
// permutation
uint h_Permut[_N];
cl_mem d_inPermut;
cl_mem d_outPermut;
// OpenCL kernels
cl_kernel ckTranspose; // transpose the initial list
cl_kernel ckHistogram; // compute histograms
cl_kernel ckScanHistogram; // scan local histogram
cl_kernel ckPasteHistogram; // paste local histograms
cl_kernel ckReorder; // final reordering
// timers
float histo_time,scan_time,reorder_time,sort_time,transpose_time;
};
float corput(int n,int k1,int k2);
#endif