-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add initial CUDA wavelet transform that works for a single 8x8x8 block.
- Loading branch information
0 parents
commit ce8e2e1
Showing
15 changed files
with
885 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
CC = gcc | ||
CXX = g++ | ||
NVCC = nvcc | ||
NVFLAGS=-Xptxas -v -g -use_fast_math | ||
CFLAGS=-fopenmp -O3 -fPIC -mavx -g | ||
LDFLAGS=-fopenmp -lm -lrt | ||
arch=sm_75 | ||
|
||
all: write_volume read_volume test_wavelet_transform_slow | ||
|
||
write_volume: | ||
$(CXX) $(LDFLAGS) write_volume.c -o write_volume.x | ||
|
||
read_volume: | ||
$(CXX) $(LDFLAGS) read_volume.c -o read_volume.x | ||
|
||
test_wavelet_transform_slow: | ||
$(NVCC) $(NVFLAGS) test_wavelet_transform_slow.cu -o test_wavelet_transform_slow.x | ||
|
||
|
||
%.o: %.c | ||
$(CC) -c $(CFLAGS) $*.c | ||
|
||
%.o: %.cpp | ||
$(CXX) -c $(CFLAGS) $*.cpp | ||
|
||
clean: | ||
rm -f *.o | ||
rm -f *.x | ||
rm -f *.bin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#pragma once | ||
|
||
template <typename T> | ||
int compare(T *a, T *b, int nx, int ny, int nz, int verbose=0, T tol=1e-5f) { | ||
int pass = 1; | ||
for (int z = 0; z < nz; ++z) | ||
for (int y = 0; y < ny; ++y) | ||
for (int x = 0; x < nx; ++x) { | ||
size_t pos = x + nx * y + nx * ny * z; | ||
T ax = a[pos]; | ||
T bx = b[pos]; | ||
T diff = ax > bx ? ax - bx : bx - ax; | ||
if (diff > tol) { | ||
pass = 0; | ||
if (verbose) printf("[%d %d %d] a = %f b = %f \n", x, y, z, ax, bx); | ||
} | ||
} | ||
|
||
return pass; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#pragma once | ||
|
||
template <typename T> | ||
void diff(T *out, const T *a, const T *b, int n) { | ||
for (int x = 0; x < n; ++x) { | ||
T ax = a[x]; | ||
T bx = b[x]; | ||
T diff = ax > bx ? ax - bx : bx - ax; | ||
out[x] = diff; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#pragma once | ||
#include <math.h> | ||
|
||
template <typename T> | ||
double l2norm(T *a, T *b, int n, int relative = 0) { | ||
double err=0; | ||
double norm_a = 0; | ||
for (int i = 0; i < n; i++) { | ||
err += (a[i] - b[i]) * (a[i] - b[i]); | ||
if (relative) norm_a += a[i] * a[i]; | ||
} | ||
err = sqrt(err); | ||
if (relative) err /= sqrt(norm_a); | ||
return err; | ||
} | ||
|
||
template <typename T> | ||
double l1norm(T *a, T *b, int n, int relative = 0) { | ||
double err=0; | ||
double norm_a = 0; | ||
for (int i = 0; i < n; i++) { | ||
err = a[i] - b[i] > 0 ? a[i] - b[i] : b[i] - a[i]; | ||
if (relative) norm_a += a[i] > 0 ? a[i] : - a[i]; | ||
} | ||
if (relative) err /= norm_a; | ||
return err; | ||
} | ||
|
||
template <typename T> | ||
double linfnorm(T *a, T *b, int n, int relative = 0) { | ||
double err = 0; | ||
double norm_a = 0; | ||
for (int i = 0; i < n; i++) { | ||
double diff = a[i] - b[i] > 0 ? a[i] - b[i] : b[i] - a[i]; | ||
err = err > diff ? err : diff; | ||
if (relative) { | ||
double diff_a = a[i] > 0 ? a[i] : - a[i]; | ||
norm_a = norm_a > diff_a ? norm_a : diff_a; | ||
} | ||
} | ||
if (relative) err /= norm_a; | ||
return err; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#pragma once | ||
#ifdef NVCC | ||
#define __dev __device__ | ||
#define __host __host__ | ||
#else | ||
#define __dev | ||
#define __host | ||
#endif | ||
|
||
#include <assert.h> | ||
|
||
__dev __host void print_array(float *a, int nx, int ny, int nz, int ix0=0, int iy0=0, int iz0=0, int ixn=0, int iyn=0, int izn=0) { | ||
|
||
ixn = ixn == 0 ? nx : ixn; | ||
iyn = iyn == 0 ? ny : iyn; | ||
izn = izn == 0 ? nz : izn; | ||
|
||
assert(nx <= ixn); | ||
assert(ny <= iyn); | ||
assert(nz <= izn); | ||
|
||
for (int iz = iz0; iz < izn; ++iz) { | ||
printf("iz = %d \n", iz); | ||
for (int iy = iy0; iy < iyn; ++iy) { | ||
for (int ix = ix0; ix < ixn; ++ix) { | ||
size_t pos = ix + nx * iy + nx * ny * iz; | ||
printf("%.2f ", a[pos]); | ||
} | ||
printf("\n"); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
#include "printing.cuh" | ||
#include "read_volume.h" | ||
|
||
|
||
|
||
int main(int argc, char **argv) { | ||
|
||
|
||
const char *filename = argv[1]; | ||
|
||
float *x; | ||
printf("reading: %s \n", filename); | ||
int nx, ny, nz; | ||
read_volume(filename, x, nx, ny, nz); | ||
print_array(x, nx, ny, nz); | ||
|
||
free(x); | ||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#include <stdlib.h> | ||
#include <stdio.h> | ||
#include <assert.h> | ||
|
||
void read_volume(const char *filename, float*& x, int& nx, int& ny, int& nz) { | ||
FILE *fh = fopen(filename, "rb"); | ||
if (!fh) { | ||
fprintf(stderr, "Unable to open: %s \n", filename); | ||
return; | ||
} | ||
int count; | ||
count = fread(&nx, sizeof(int), 1, fh); | ||
assert(count == 1); | ||
count = fread(&ny, sizeof(int), 1, fh); | ||
assert(count == 1); | ||
count = fread(&nz, sizeof(int), 1, fh); | ||
assert(count == 1); | ||
size_t n = nx * ny * nz; | ||
x = (float*)malloc(sizeof(float) * n); | ||
count = fread(x, sizeof(float), n, fh); | ||
assert(count == n); | ||
fclose(fh); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <immintrin.h> | ||
|
||
#include "printing.cuh" | ||
|
||
#include "read_volume.h" | ||
#include "write_volume.h" | ||
#include "wavelet_slow.h" | ||
|
||
int main(int argc, char **argv) { | ||
|
||
|
||
const char *filename = argv[1]; | ||
const char *outfilename = argv[2]; | ||
|
||
float *x; | ||
printf("reading: %s \n", filename); | ||
int nx = 0, ny = 0, nz = 0; | ||
read_volume(filename, x, nx, ny, nz); | ||
size_t num_bytes = sizeof(float) * nx * ny * nz * 1024; | ||
float *work = (float*)malloc(num_bytes); | ||
|
||
printf("transforming... \n"); | ||
int bx = 8; | ||
int by = 8; | ||
int bz = 8; | ||
int x0 = 0; | ||
int y0 = 0; | ||
int z0 = 0; | ||
|
||
//for (int iy = 0; iy < by; ++iy) | ||
Ds79(&x[nx*0], work, 1, bx); | ||
print_array(x, nx, 1, 1); | ||
//for (int ix = 0; ix < bx; ++ix) | ||
// Ds79(&x[ix], work, bx, by); | ||
|
||
|
||
printf("writing: %s \n", outfilename); | ||
write_volume(outfilename, x, nx, ny, 1); | ||
|
||
free(x); | ||
free(work); | ||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
|
||
#define VERBOSE 0 | ||
#define NVCC | ||
#include "printing.cuh" | ||
|
||
#include "read_volume.h" | ||
#include "write_volume.h" | ||
#include "wavelet_slow.h" | ||
#include "wavelet_slow.cuh" | ||
#include "compare.h" | ||
#include "diff.h" | ||
#include "norms.h" | ||
|
||
const int FORWARD = 0; | ||
const int INVERSE = 1; | ||
|
||
int main(int argc, char **argv) { | ||
|
||
|
||
const char *filename = argv[1]; | ||
const char *outfilename = argv[2]; | ||
|
||
float *x, *x2; | ||
printf("reading: %s \n", filename); | ||
int nx = 0, ny = 0, nz = 0; | ||
read_volume(filename, x, nx, ny, nz); | ||
read_volume(filename, x2, nx, ny, nz); | ||
size_t num_bytes = sizeof(float) * nx * ny * nz; | ||
float *work = (float*)malloc(num_bytes); | ||
float *x_gpu = (float*)malloc(num_bytes); | ||
float *err = (float*)malloc(num_bytes); | ||
|
||
float *d_x, *d_y; | ||
cudaMalloc((void**)&d_x, num_bytes); | ||
cudaMemcpy(d_x, x, num_bytes, cudaMemcpyHostToDevice); | ||
|
||
int bx = 8; | ||
int by = 8; | ||
int bz = 8; | ||
int x0 = 0; | ||
int y0 = 0; | ||
int z0 = 0; | ||
|
||
|
||
{ | ||
printf("Computing CPU forward transform... \n"); | ||
Wavelet_Transform_Slow_Forward(x, work, bx, by, bz, x0, y0, z0, nx, ny, nz); | ||
printf("Computing CPU inverse transform... \n"); | ||
Wavelet_Transform_Slow_Inverse(x, work, bx, by, bz, x0, y0, z0, nx, ny, nz); | ||
|
||
assert(compare(x, x2, 8, 8, 8, 1)); | ||
|
||
double l2err = l2norm(x, x2, bx * by * bz); | ||
double l1err = l1norm(x, x2, bx * by * bz); | ||
double linferr = linfnorm(x, x2, bx * by * bz); | ||
printf("l2 error = %g l1 error = %g linf error = %g \n", l2err, l1err, linferr); | ||
} | ||
|
||
{ | ||
dim3 threads(32, 2, 1); | ||
dim3 blocks(1, 1, 1); | ||
printf("Computing GPU forward transform... \n"); | ||
wl79_8x8x8<FORWARD><<<blocks, threads>>>(d_x, bx, by, bz); | ||
printf("Computing GPU inverse transform... \n"); | ||
wl79_8x8x8<INVERSE><<<blocks, threads>>>(d_x, bx, by, bz); | ||
cudaDeviceSynchronize(); | ||
cudaMemcpy(x_gpu, d_x, num_bytes, cudaMemcpyDeviceToHost); | ||
assert(compare(x, x_gpu, 8, 8, 8, 1)); | ||
|
||
double l2err = l2norm(x, x_gpu, bx * by * bz); | ||
double l1err = l1norm(x, x_gpu, bx * by * bz); | ||
double linferr = linfnorm(x, x_gpu, bx * by * bz); | ||
printf("l2 error = %g l1 error = %g linf error = %g \n", l2err, l1err, linferr); | ||
} | ||
|
||
if (VERBOSE) { | ||
diff(err, x, x_gpu, 512); | ||
printf("x = \n"); | ||
print_array(x, 8, 8, 8); | ||
printf("x_gpu = \n"); | ||
print_array(x_gpu, 8, 8, 8); | ||
printf("err = \n"); | ||
print_array(err, 8, 8, 8); | ||
} | ||
|
||
printf("Test(s) passed!\n"); | ||
|
||
|
||
if (outfilename) { | ||
printf("writing: %s \n", outfilename); | ||
write_volume(outfilename, x, nx, ny, 1); | ||
} | ||
|
||
} | ||
|
Oops, something went wrong.