Skip to content

Commit

Permalink
Add initial CUDA wavelet transform that works for a single 8x8x8 block.
Browse files Browse the repository at this point in the history
  • Loading branch information
ooreilly committed Dec 7, 2020
0 parents commit ce8e2e1
Show file tree
Hide file tree
Showing 15 changed files with 885 additions and 0 deletions.
30 changes: 30 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
CC = gcc
CXX = g++
NVCC = nvcc
NVFLAGS=-Xptxas -v -g -use_fast_math
CFLAGS=-fopenmp -O3 -fPIC -mavx -g
LDFLAGS=-fopenmp -lm -lrt
arch=sm_75

all: write_volume read_volume test_wavelet_transform_slow

write_volume:
$(CXX) $(LDFLAGS) write_volume.c -o write_volume.x

read_volume:
$(CXX) $(LDFLAGS) read_volume.c -o read_volume.x

test_wavelet_transform_slow:
$(NVCC) $(NVFLAGS) test_wavelet_transform_slow.cu -o test_wavelet_transform_slow.x


%.o: %.c
$(CC) -c $(CFLAGS) $*.c

%.o: %.cpp
$(CXX) -c $(CFLAGS) $*.cpp

clean:
rm -f *.o
rm -f *.x
rm -f *.bin
20 changes: 20 additions & 0 deletions compare.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once

template <typename T>
int compare(T *a, T *b, int nx, int ny, int nz, int verbose=0, T tol=1e-5f) {
int pass = 1;
for (int z = 0; z < nz; ++z)
for (int y = 0; y < ny; ++y)
for (int x = 0; x < nx; ++x) {
size_t pos = x + nx * y + nx * ny * z;
T ax = a[pos];
T bx = b[pos];
T diff = ax > bx ? ax - bx : bx - ax;
if (diff > tol) {
pass = 0;
if (verbose) printf("[%d %d %d] a = %f b = %f \n", x, y, z, ax, bx);
}
}

return pass;
}
11 changes: 11 additions & 0 deletions diff.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

template <typename T>
void diff(T *out, const T *a, const T *b, int n) {
for (int x = 0; x < n; ++x) {
T ax = a[x];
T bx = b[x];
T diff = ax > bx ? ax - bx : bx - ax;
out[x] = diff;
}
}
43 changes: 43 additions & 0 deletions norms.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#pragma once
#include <math.h>

template <typename T>
double l2norm(T *a, T *b, int n, int relative = 0) {
double err=0;
double norm_a = 0;
for (int i = 0; i < n; i++) {
err += (a[i] - b[i]) * (a[i] - b[i]);
if (relative) norm_a += a[i] * a[i];
}
err = sqrt(err);
if (relative) err /= sqrt(norm_a);
return err;
}

template <typename T>
double l1norm(T *a, T *b, int n, int relative = 0) {
double err=0;
double norm_a = 0;
for (int i = 0; i < n; i++) {
err = a[i] - b[i] > 0 ? a[i] - b[i] : b[i] - a[i];
if (relative) norm_a += a[i] > 0 ? a[i] : - a[i];
}
if (relative) err /= norm_a;
return err;
}

template <typename T>
double linfnorm(T *a, T *b, int n, int relative = 0) {
double err = 0;
double norm_a = 0;
for (int i = 0; i < n; i++) {
double diff = a[i] - b[i] > 0 ? a[i] - b[i] : b[i] - a[i];
err = err > diff ? err : diff;
if (relative) {
double diff_a = a[i] > 0 ? a[i] : - a[i];
norm_a = norm_a > diff_a ? norm_a : diff_a;
}
}
if (relative) err /= norm_a;
return err;
}
33 changes: 33 additions & 0 deletions printing.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once
#ifdef NVCC
#define __dev __device__
#define __host __host__
#else
#define __dev
#define __host
#endif

#include <assert.h>

__dev __host void print_array(float *a, int nx, int ny, int nz, int ix0=0, int iy0=0, int iz0=0, int ixn=0, int iyn=0, int izn=0) {

ixn = ixn == 0 ? nx : ixn;
iyn = iyn == 0 ? ny : iyn;
izn = izn == 0 ? nz : izn;

assert(nx <= ixn);
assert(ny <= iyn);
assert(nz <= izn);

for (int iz = iz0; iz < izn; ++iz) {
printf("iz = %d \n", iz);
for (int iy = iy0; iy < iyn; ++iy) {
for (int ix = ix0; ix < ixn; ++ix) {
size_t pos = ix + nx * iy + nx * ny * iz;
printf("%.2f ", a[pos]);
}
printf("\n");
}
}

}
23 changes: 23 additions & 0 deletions read_volume.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#include <stdio.h>
#include <stdlib.h>

#include "printing.cuh"
#include "read_volume.h"



int main(int argc, char **argv) {


const char *filename = argv[1];

float *x;
printf("reading: %s \n", filename);
int nx, ny, nz;
read_volume(filename, x, nx, ny, nz);
print_array(x, nx, ny, nz);

free(x);

}

24 changes: 24 additions & 0 deletions read_volume.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>

void read_volume(const char *filename, float*& x, int& nx, int& ny, int& nz) {
FILE *fh = fopen(filename, "rb");
if (!fh) {
fprintf(stderr, "Unable to open: %s \n", filename);
return;
}
int count;
count = fread(&nx, sizeof(int), 1, fh);
assert(count == 1);
count = fread(&ny, sizeof(int), 1, fh);
assert(count == 1);
count = fread(&nz, sizeof(int), 1, fh);
assert(count == 1);
size_t n = nx * ny * nz;
x = (float*)malloc(sizeof(float) * n);
count = fread(x, sizeof(float), n, fh);
assert(count == n);
fclose(fh);
}

47 changes: 47 additions & 0 deletions run_slow_transform.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <immintrin.h>

#include "printing.cuh"

#include "read_volume.h"
#include "write_volume.h"
#include "wavelet_slow.h"

int main(int argc, char **argv) {


const char *filename = argv[1];
const char *outfilename = argv[2];

float *x;
printf("reading: %s \n", filename);
int nx = 0, ny = 0, nz = 0;
read_volume(filename, x, nx, ny, nz);
size_t num_bytes = sizeof(float) * nx * ny * nz * 1024;
float *work = (float*)malloc(num_bytes);

printf("transforming... \n");
int bx = 8;
int by = 8;
int bz = 8;
int x0 = 0;
int y0 = 0;
int z0 = 0;

//for (int iy = 0; iy < by; ++iy)
Ds79(&x[nx*0], work, 1, bx);
print_array(x, nx, 1, 1);
//for (int ix = 0; ix < bx; ++ix)
// Ds79(&x[ix], work, bx, by);


printf("writing: %s \n", outfilename);
write_volume(outfilename, x, nx, ny, 1);

free(x);
free(work);

}

98 changes: 98 additions & 0 deletions test_wavelet_transform_slow.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define VERBOSE 0
#define NVCC
#include "printing.cuh"

#include "read_volume.h"
#include "write_volume.h"
#include "wavelet_slow.h"
#include "wavelet_slow.cuh"
#include "compare.h"
#include "diff.h"
#include "norms.h"

const int FORWARD = 0;
const int INVERSE = 1;

int main(int argc, char **argv) {


const char *filename = argv[1];
const char *outfilename = argv[2];

float *x, *x2;
printf("reading: %s \n", filename);
int nx = 0, ny = 0, nz = 0;
read_volume(filename, x, nx, ny, nz);
read_volume(filename, x2, nx, ny, nz);
size_t num_bytes = sizeof(float) * nx * ny * nz;
float *work = (float*)malloc(num_bytes);
float *x_gpu = (float*)malloc(num_bytes);
float *err = (float*)malloc(num_bytes);

float *d_x, *d_y;
cudaMalloc((void**)&d_x, num_bytes);
cudaMemcpy(d_x, x, num_bytes, cudaMemcpyHostToDevice);

int bx = 8;
int by = 8;
int bz = 8;
int x0 = 0;
int y0 = 0;
int z0 = 0;


{
printf("Computing CPU forward transform... \n");
Wavelet_Transform_Slow_Forward(x, work, bx, by, bz, x0, y0, z0, nx, ny, nz);
printf("Computing CPU inverse transform... \n");
Wavelet_Transform_Slow_Inverse(x, work, bx, by, bz, x0, y0, z0, nx, ny, nz);

assert(compare(x, x2, 8, 8, 8, 1));

double l2err = l2norm(x, x2, bx * by * bz);
double l1err = l1norm(x, x2, bx * by * bz);
double linferr = linfnorm(x, x2, bx * by * bz);
printf("l2 error = %g l1 error = %g linf error = %g \n", l2err, l1err, linferr);
}

{
dim3 threads(32, 2, 1);
dim3 blocks(1, 1, 1);
printf("Computing GPU forward transform... \n");
wl79_8x8x8<FORWARD><<<blocks, threads>>>(d_x, bx, by, bz);
printf("Computing GPU inverse transform... \n");
wl79_8x8x8<INVERSE><<<blocks, threads>>>(d_x, bx, by, bz);
cudaDeviceSynchronize();
cudaMemcpy(x_gpu, d_x, num_bytes, cudaMemcpyDeviceToHost);
assert(compare(x, x_gpu, 8, 8, 8, 1));

double l2err = l2norm(x, x_gpu, bx * by * bz);
double l1err = l1norm(x, x_gpu, bx * by * bz);
double linferr = linfnorm(x, x_gpu, bx * by * bz);
printf("l2 error = %g l1 error = %g linf error = %g \n", l2err, l1err, linferr);
}

if (VERBOSE) {
diff(err, x, x_gpu, 512);
printf("x = \n");
print_array(x, 8, 8, 8);
printf("x_gpu = \n");
print_array(x_gpu, 8, 8, 8);
printf("err = \n");
print_array(err, 8, 8, 8);
}

printf("Test(s) passed!\n");


if (outfilename) {
printf("writing: %s \n", outfilename);
write_volume(outfilename, x, nx, ny, 1);
}

}

Loading

0 comments on commit ce8e2e1

Please sign in to comment.