Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Few fixes for CUDA #169

Merged
merged 5 commits into from
Apr 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ cmake-build-debug/
cmake-build-release/
cmake-build*
xcode*
build-cmake-debug/
build-cmake-release/
#####
# Xcode private settings (window sizes, bookmarks, breakpoints, custom executables, smart groups)
#
Expand Down
187 changes: 110 additions & 77 deletions src/algorithm/APRConverter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,21 +67,38 @@ class APRConverter {
APRTimer computation_timer;
APRParameters par;

template<typename T>
template <typename T>
bool get_apr(APR &aAPR, PixelData<T> &input_image);

template <typename T>
bool get_apr_cpu(APR &aAPR, PixelData<T> &input_image);

#ifdef APR_USE_CUDA
template <typename T>
bool get_apr_cuda(APR &aAPR, PixelData<T> &input_image);
#endif

bool verbose = true;

void get_apr_custom_grad_scale(APR& aAPR,PixelData<ImageType>& grad,PixelData<float>& lis,bool down_sampled = true);

void initPipelineAPR(APR &aAPR, int y_num, int x_num = 1, int z_num = 1){
//
// Initializes the APR datastructures for the given image.
//
template <typename T>
bool initPipelineAPR(APR &aAPR, PixelData<T> &input_image) {

if (par.check_input) {
if (!check_input_dimensions(input_image)) {
std::cout << "Input dimension check failed. Make sure the input image is filled in order x -> y -> z, or try using the option -swap_dimension" << std::endl;
return false;
}
}

aAPR.aprInfo.init(y_num,x_num,z_num);
// Initializes the APR datastructures for the given image.
aAPR.parameters = par;
aAPR.aprInfo.init(input_image.y_num,input_image.x_num,input_image.z_num);
aAPR.linearAccess.genInfo = &aAPR.aprInfo;
aAPR.apr_access.genInfo = &aAPR.aprInfo;

return true;
}

protected:
Expand Down Expand Up @@ -121,7 +138,6 @@ class APRConverter {

void initPipelineMemory(int y_num,int x_num = 1,int z_num = 1);


};


Expand Down Expand Up @@ -423,71 +439,16 @@ inline bool APRConverter<ImageType>::get_ds(APR &aAPR) {
}


#ifdef APR_USE_CUDA
/**
* Main method for constructing the APR from an input image
* Implementation of pipeline for GPU/CUDA
*
* @param aAPR - the APR datastructure
* @param input_image - input image
*/
template<typename ImageType> template<typename T>
inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_image) {

aAPR.parameters = par;

if(par.check_input) {
if(!check_input_dimensions(input_image)) {
std::cout << "Input dimension check failed. Make sure the input image is filled in order x -> y -> z, or try using the option -swap_dimension" << std::endl;
return false;
}
}


initPipelineAPR(aAPR, input_image.y_num, input_image.x_num, input_image.z_num);

#ifndef APR_USE_CUDA

total_timer.start_timer("full_pipeline");

computation_timer.start_timer("init_mem");

initPipelineMemory(input_image.y_num, input_image.x_num, input_image.z_num);

computation_timer.stop_timer();


computation_timer.start_timer("compute_L");

//Compute the local resolution estimate
computeL(aAPR,input_image);

computation_timer.stop_timer();

computation_timer.start_timer("apply_parameters");

if( par.auto_parameters ) {
method_timer.start_timer("autoParameters");
// autoParameters(local_scale_temp,grad_temp);
autoParametersLiEntropy(local_scale_temp2, local_scale_temp, grad_temp);
aAPR.parameters = par;
method_timer.stop_timer();
}

applyParameters(aAPR,par);

computation_timer.stop_timer();

computation_timer.start_timer("solve_for_apr");

solveForAPR(aAPR);

computation_timer.stop_timer();

computation_timer.start_timer("generate_data_structures");

generateDatastructures(aAPR);

computation_timer.stop_timer();

total_timer.stop_timer();

#else
inline bool APRConverter<ImageType>::get_apr_cuda(APR &aAPR, PixelData<T>& input_image) {
if (!initPipelineAPR(aAPR, input_image)) return false;


initPipelineMemory(input_image.y_num, input_image.x_num, input_image.z_num);
Expand All @@ -501,19 +462,16 @@ inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_imag
computation_timer.start_timer("init_mem");
PixelData<ImageType> image_temp(input_image, false /* don't copy */, true /* pinned memory */); // global image variable useful for passing between methods, or re-using memory (should be the only full sized copy of the image)


/////////////////////////////////
/// Pipeline
////////////////////////


//offset image by factor (this is required if there are zero areas in the background with uint16_t and uint8_t images, as the Bspline co-efficients otherwise may be negative!)
// Warning both of these could result in over-flow (if your image is non zero, with a 'buffer' and has intensities up to uint16_t maximum value then set image_type = "", i.e. uncomment the following line)

if (std::is_same<uint16_t, ImageType>::value) {
bspline_offset = 100;
image_temp.copyFromMeshWithUnaryOp(input_image, [=](const auto &a) { return (a + bspline_offset); });
} else if (std::is_same<uint8_t, ImageType>::value){
} else if (std::is_same<uint8_t, ImageType>::value) {
bspline_offset = 5;
image_temp.copyFromMeshWithUnaryOp(input_image, [=](const auto &a) { return (a + bspline_offset); });
} else {
Expand Down Expand Up @@ -561,13 +519,13 @@ inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_imag
PixelData<float> lst(local_scale_temp, true);

#ifdef HAVE_LIBTIFF
if(par.output_steps){
if (par.output_steps){
TiffUtils::saveMeshAsTiff(par.output_dir + "local_intensity_scale_step.tif", lst);
}
#endif

#ifdef HAVE_LIBTIFF
if(par.output_steps){
if (par.output_steps){
TiffUtils::saveMeshAsTiff(par.output_dir + "gradient_step.tif", grad_temp);
}
#endif
Expand All @@ -581,19 +539,94 @@ inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_imag
computation_timer.start_timer("generate_data_structures");
generateDatastructures(aAPR);
computation_timer.stop_timer();


}
std::cout << "Total n ENDED" << std::endl;

}
t.stop_timer();
method_timer.stop_timer();

return true;
}
#endif


/**
* Implementation of pipeline for CPU
*
* @param aAPR - the APR datastructure
* @param input_image - input image
*/
template<typename ImageType> template<typename T>
inline bool APRConverter<ImageType>::get_apr_cpu(APR &aAPR, PixelData<T> &input_image) {

if (!initPipelineAPR(aAPR, input_image)) return false;

total_timer.start_timer("full_pipeline");

computation_timer.start_timer("init_mem");

initPipelineMemory(input_image.y_num, input_image.x_num, input_image.z_num);

computation_timer.stop_timer();

computation_timer.start_timer("compute_L");

//Compute the local resolution estimate
computeL(aAPR,input_image);

computation_timer.stop_timer();

computation_timer.start_timer("apply_parameters");

if (par.auto_parameters) {
method_timer.start_timer("autoParameters");
// autoParameters(local_scale_temp,grad_temp);
autoParametersLiEntropy(local_scale_temp2, local_scale_temp, grad_temp);
aAPR.parameters = par;
method_timer.stop_timer();
}

applyParameters(aAPR,par);

computation_timer.stop_timer();

computation_timer.start_timer("solve_for_apr");

solveForAPR(aAPR);

computation_timer.stop_timer();

computation_timer.start_timer("generate_data_structures");

generateDatastructures(aAPR);

computation_timer.stop_timer();

total_timer.stop_timer();

return true;
}


/**
* Main method for constructing the APR from an input image
*
* @param aAPR - the APR data structure
* @param input_image - input image
*/
template<typename ImageType> template<typename T>
inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T> &input_image) {
// TODO: CUDA pipeline is temporarily turned off and CPU version is always chosen.
// After revising a CUDA pipeline remove "#if true // " part.
#if true // #ifndef APR_USE_CUDA
return get_apr_cpu(aAPR, input_image);
#else
return get_apr_cuda(aAPR, input_image);
#endif
}


template<typename T>
void compute_means(const std::vector<T>& data, float threshold, float& mean_back, float& mean_fore) {
float sum_fore=0.f, sum_back=0.f;
Expand Down
1 change: 0 additions & 1 deletion src/algorithm/APRParameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ class APRParameters {

// additional pipeline parameters
bool reflect_bc_lis = true;
int extra_smooth = 0;
bool check_input = false;
bool swap_dimensions = false;
bool neighborhood_optimization = true;
Expand Down
13 changes: 0 additions & 13 deletions src/algorithm/LocalIntensityScale.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,19 +109,6 @@ void get_local_intensity_scale(PixelData<float> &local_scale_temp, PixelData<flo
calc_sat_mean_z(local_scale_temp, win_z2);
}

// second average for extra smoothing
if(par.extra_smooth) {
if (active_y) {
calc_sat_mean_y(local_scale_temp, par.extra_smooth);
}
if (active_x) {
calc_sat_mean_x(local_scale_temp, par.extra_smooth);
}
if (active_z) {
calc_sat_mean_z(local_scale_temp, par.extra_smooth);
}
}

rescale_var(local_scale_temp, var_rescale);
timer.stop_timer();

Expand Down
4 changes: 4 additions & 0 deletions src/data_structures/APR/access/GPUAccess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ template class ParticleDataGpu<float>;
template class ParticleDataGpu<double>;
template class ParticleDataGpu<int>;
template class ParticleDataGpu<uint64_t>;
template class ParticleDataGpu<uint32_t>;
template class ParticleDataGpu<int8_t>;
template class ParticleDataGpu<int16_t>;
template class ParticleDataGpu<int64_t>;

__global__ void fill_y_vec_max_level(const uint64_t* level_xz_vec,
const uint64_t* xz_end_vec,
Expand Down
2 changes: 1 addition & 1 deletion test/APRTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2879,7 +2879,7 @@ bool test_pipeline_u16(TestData& test_data){
}

APR apr_c;
aprConverter.initPipelineAPR(apr_c, test_data.img_original.y_num, test_data.img_original.x_num, test_data.img_original.z_num);
aprConverter.initPipelineAPR(apr_c, test_data.img_original);

aprConverter.get_apr_custom_grad_scale(apr_c,gradient_saved,scale_saved);

Expand Down
Loading