Skip to content

Commit

Permalink
Merge pull request #169 from AdaptiveParticles/developKrzysztof
Browse files Browse the repository at this point in the history
Few fixes for CUDA
  • Loading branch information
krzysg authored Apr 21, 2022
2 parents 4fec777 + 9f7029a commit 4244642
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 200 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ cmake-build-debug/
cmake-build-release/
cmake-build*
xcode*
build-cmake-debug/
build-cmake-release/
#####
# Xcode private settings (window sizes, bookmarks, breakpoints, custom executables, smart groups)
#
Expand Down
187 changes: 110 additions & 77 deletions src/algorithm/APRConverter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,21 +67,38 @@ class APRConverter {
APRTimer computation_timer;
APRParameters par;

template<typename T>
template <typename T>
bool get_apr(APR &aAPR, PixelData<T> &input_image);

template <typename T>
bool get_apr_cpu(APR &aAPR, PixelData<T> &input_image);

#ifdef APR_USE_CUDA
template <typename T>
bool get_apr_cuda(APR &aAPR, PixelData<T> &input_image);
#endif

bool verbose = true;

void get_apr_custom_grad_scale(APR& aAPR,PixelData<ImageType>& grad,PixelData<float>& lis,bool down_sampled = true);

void initPipelineAPR(APR &aAPR, int y_num, int x_num = 1, int z_num = 1){
//
// Initializes the APR datastructures for the given image.
//
template <typename T>
bool initPipelineAPR(APR &aAPR, PixelData<T> &input_image) {

if (par.check_input) {
if (!check_input_dimensions(input_image)) {
std::cout << "Input dimension check failed. Make sure the input image is filled in order x -> y -> z, or try using the option -swap_dimension" << std::endl;
return false;
}
}

aAPR.aprInfo.init(y_num,x_num,z_num);
// Initializes the APR datastructures for the given image.
aAPR.parameters = par;
aAPR.aprInfo.init(input_image.y_num,input_image.x_num,input_image.z_num);
aAPR.linearAccess.genInfo = &aAPR.aprInfo;
aAPR.apr_access.genInfo = &aAPR.aprInfo;

return true;
}

protected:
Expand Down Expand Up @@ -121,7 +138,6 @@ class APRConverter {

void initPipelineMemory(int y_num,int x_num = 1,int z_num = 1);


};


Expand Down Expand Up @@ -423,71 +439,16 @@ inline bool APRConverter<ImageType>::get_ds(APR &aAPR) {
}


#ifdef APR_USE_CUDA
/**
* Main method for constructing the APR from an input image
* Implementation of pipeline for GPU/CUDA
*
* @param aAPR - the APR datastructure
* @param input_image - input image
*/
template<typename ImageType> template<typename T>
inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_image) {

aAPR.parameters = par;

if(par.check_input) {
if(!check_input_dimensions(input_image)) {
std::cout << "Input dimension check failed. Make sure the input image is filled in order x -> y -> z, or try using the option -swap_dimension" << std::endl;
return false;
}
}


initPipelineAPR(aAPR, input_image.y_num, input_image.x_num, input_image.z_num);

#ifndef APR_USE_CUDA

total_timer.start_timer("full_pipeline");

computation_timer.start_timer("init_mem");

initPipelineMemory(input_image.y_num, input_image.x_num, input_image.z_num);

computation_timer.stop_timer();


computation_timer.start_timer("compute_L");

//Compute the local resolution estimate
computeL(aAPR,input_image);

computation_timer.stop_timer();

computation_timer.start_timer("apply_parameters");

if( par.auto_parameters ) {
method_timer.start_timer("autoParameters");
// autoParameters(local_scale_temp,grad_temp);
autoParametersLiEntropy(local_scale_temp2, local_scale_temp, grad_temp);
aAPR.parameters = par;
method_timer.stop_timer();
}

applyParameters(aAPR,par);

computation_timer.stop_timer();

computation_timer.start_timer("solve_for_apr");

solveForAPR(aAPR);

computation_timer.stop_timer();

computation_timer.start_timer("generate_data_structures");

generateDatastructures(aAPR);

computation_timer.stop_timer();

total_timer.stop_timer();

#else
inline bool APRConverter<ImageType>::get_apr_cuda(APR &aAPR, PixelData<T>& input_image) {
if (!initPipelineAPR(aAPR, input_image)) return false;


initPipelineMemory(input_image.y_num, input_image.x_num, input_image.z_num);
Expand All @@ -501,19 +462,16 @@ inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_imag
computation_timer.start_timer("init_mem");
PixelData<ImageType> image_temp(input_image, false /* don't copy */, true /* pinned memory */); // global image variable useful for passing between methods, or re-using memory (should be the only full sized copy of the image)


/////////////////////////////////
/// Pipeline
////////////////////////


//offset image by factor (this is required if there are zero areas in the background with uint16_t and uint8_t images, as the Bspline co-efficients otherwise may be negative!)
// Warning both of these could result in over-flow (if your image is non zero, with a 'buffer' and has intensities up to uint16_t maximum value then set image_type = "", i.e. uncomment the following line)

if (std::is_same<uint16_t, ImageType>::value) {
bspline_offset = 100;
image_temp.copyFromMeshWithUnaryOp(input_image, [=](const auto &a) { return (a + bspline_offset); });
} else if (std::is_same<uint8_t, ImageType>::value){
} else if (std::is_same<uint8_t, ImageType>::value) {
bspline_offset = 5;
image_temp.copyFromMeshWithUnaryOp(input_image, [=](const auto &a) { return (a + bspline_offset); });
} else {
Expand Down Expand Up @@ -561,13 +519,13 @@ inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_imag
PixelData<float> lst(local_scale_temp, true);

#ifdef HAVE_LIBTIFF
if(par.output_steps){
if (par.output_steps){
TiffUtils::saveMeshAsTiff(par.output_dir + "local_intensity_scale_step.tif", lst);
}
#endif

#ifdef HAVE_LIBTIFF
if(par.output_steps){
if (par.output_steps){
TiffUtils::saveMeshAsTiff(par.output_dir + "gradient_step.tif", grad_temp);
}
#endif
Expand All @@ -581,19 +539,94 @@ inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_imag
computation_timer.start_timer("generate_data_structures");
generateDatastructures(aAPR);
computation_timer.stop_timer();


}
std::cout << "Total n ENDED" << std::endl;

}
t.stop_timer();
method_timer.stop_timer();

return true;
}
#endif


/**
* Implementation of pipeline for CPU
*
* @param aAPR - the APR datastructure
* @param input_image - input image
*/
template<typename ImageType> template<typename T>
inline bool APRConverter<ImageType>::get_apr_cpu(APR &aAPR, PixelData<T> &input_image) {

if (!initPipelineAPR(aAPR, input_image)) return false;

total_timer.start_timer("full_pipeline");

computation_timer.start_timer("init_mem");

initPipelineMemory(input_image.y_num, input_image.x_num, input_image.z_num);

computation_timer.stop_timer();

computation_timer.start_timer("compute_L");

//Compute the local resolution estimate
computeL(aAPR,input_image);

computation_timer.stop_timer();

computation_timer.start_timer("apply_parameters");

if (par.auto_parameters) {
method_timer.start_timer("autoParameters");
// autoParameters(local_scale_temp,grad_temp);
autoParametersLiEntropy(local_scale_temp2, local_scale_temp, grad_temp);
aAPR.parameters = par;
method_timer.stop_timer();
}

applyParameters(aAPR,par);

computation_timer.stop_timer();

computation_timer.start_timer("solve_for_apr");

solveForAPR(aAPR);

computation_timer.stop_timer();

computation_timer.start_timer("generate_data_structures");

generateDatastructures(aAPR);

computation_timer.stop_timer();

total_timer.stop_timer();

return true;
}


/**
* Main method for constructing the APR from an input image
*
* @param aAPR - the APR data structure
* @param input_image - input image
*/
template<typename ImageType> template<typename T>
inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T> &input_image) {
// TODO: CUDA pipeline is temporarily turned off and CPU version is always chosen.
// After revising a CUDA pipeline remove "#if true // " part.
#if true // #ifndef APR_USE_CUDA
return get_apr_cpu(aAPR, input_image);
#else
return get_apr_cuda(aAPR, input_image);
#endif
}


template<typename T>
void compute_means(const std::vector<T>& data, float threshold, float& mean_back, float& mean_fore) {
float sum_fore=0.f, sum_back=0.f;
Expand Down
1 change: 0 additions & 1 deletion src/algorithm/APRParameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ class APRParameters {

// additional pipeline parameters
bool reflect_bc_lis = true;
int extra_smooth = 0;
bool check_input = false;
bool swap_dimensions = false;
bool neighborhood_optimization = true;
Expand Down
13 changes: 0 additions & 13 deletions src/algorithm/LocalIntensityScale.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,19 +109,6 @@ void get_local_intensity_scale(PixelData<float> &local_scale_temp, PixelData<flo
calc_sat_mean_z(local_scale_temp, win_z2);
}

// second average for extra smoothing
if(par.extra_smooth) {
if (active_y) {
calc_sat_mean_y(local_scale_temp, par.extra_smooth);
}
if (active_x) {
calc_sat_mean_x(local_scale_temp, par.extra_smooth);
}
if (active_z) {
calc_sat_mean_z(local_scale_temp, par.extra_smooth);
}
}

rescale_var(local_scale_temp, var_rescale);
timer.stop_timer();

Expand Down
4 changes: 4 additions & 0 deletions src/data_structures/APR/access/GPUAccess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ template class ParticleDataGpu<float>;
template class ParticleDataGpu<double>;
template class ParticleDataGpu<int>;
template class ParticleDataGpu<uint64_t>;
template class ParticleDataGpu<uint32_t>;
template class ParticleDataGpu<int8_t>;
template class ParticleDataGpu<int16_t>;
template class ParticleDataGpu<int64_t>;

__global__ void fill_y_vec_max_level(const uint64_t* level_xz_vec,
const uint64_t* xz_end_vec,
Expand Down
2 changes: 1 addition & 1 deletion test/APRTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2879,7 +2879,7 @@ bool test_pipeline_u16(TestData& test_data){
}

APR apr_c;
aprConverter.initPipelineAPR(apr_c, test_data.img_original.y_num, test_data.img_original.x_num, test_data.img_original.z_num);
aprConverter.initPipelineAPR(apr_c, test_data.img_original);

aprConverter.get_apr_custom_grad_scale(apr_c,gradient_saved,scale_saved);

Expand Down
Loading

0 comments on commit 4244642

Please sign in to comment.