Merge pull request #169 from AdaptiveParticles/developKrzysztof

Few fixes for CUDA
AdaptiveParticles · Apr 21, 2022 · 4244642 · 4244642
2 parents 4fec777 + 9f7029a
commit 4244642
Show file tree

Hide file tree

Showing 7 changed files with 227 additions and 200 deletions.
diff --git a/.gitignore b/.gitignore
@@ -217,6 +217,8 @@ cmake-build-debug/
 cmake-build-release/
 cmake-build*
 xcode*
+build-cmake-debug/
+build-cmake-release/
 #####
 # Xcode private settings (window sizes, bookmarks, breakpoints, custom executables, smart groups)
 #

diff --git a/src/algorithm/APRConverter.hpp b/src/algorithm/APRConverter.hpp
@@ -67,21 +67,38 @@ class APRConverter {
     APRTimer computation_timer;
     APRParameters par;
 
-    template<typename T>
+    template <typename T>
     bool get_apr(APR &aAPR, PixelData<T> &input_image);
 
+    template <typename T>
+    bool get_apr_cpu(APR &aAPR, PixelData<T> &input_image);
+
+#ifdef APR_USE_CUDA
+    template <typename T>
+    bool get_apr_cuda(APR &aAPR, PixelData<T> &input_image);
+#endif
+
     bool verbose = true;
 
     void get_apr_custom_grad_scale(APR& aAPR,PixelData<ImageType>& grad,PixelData<float>& lis,bool down_sampled = true);
 
-    void initPipelineAPR(APR &aAPR, int y_num, int x_num = 1, int z_num = 1){
-        //
-        //  Initializes the APR datastructures for the given image.
-        //
+    template <typename T>
+    bool initPipelineAPR(APR &aAPR, PixelData<T> &input_image) {
+
+        if (par.check_input) {
+            if (!check_input_dimensions(input_image)) {
+                std::cout << "Input dimension check failed. Make sure the input image is filled in order x -> y -> z, or try using the option -swap_dimension" << std::endl;
+                return false;
+            }
+        }
 
-        aAPR.aprInfo.init(y_num,x_num,z_num);
+        //  Initializes the APR datastructures for the given image.
+        aAPR.parameters = par;
+        aAPR.aprInfo.init(input_image.y_num,input_image.x_num,input_image.z_num);
         aAPR.linearAccess.genInfo = &aAPR.aprInfo;
         aAPR.apr_access.genInfo = &aAPR.aprInfo;
+
+        return true;
     }
 
 protected:
@@ -121,7 +138,6 @@ class APRConverter {
 
     void initPipelineMemory(int y_num,int x_num = 1,int z_num = 1);
 
-
 };
 
 
@@ -423,71 +439,16 @@ inline bool APRConverter<ImageType>::get_ds(APR &aAPR) {
 }
 
 
+#ifdef APR_USE_CUDA
 /**
- * Main method for constructing the APR from an input image
+ * Implementation of pipeline for GPU/CUDA
+ *
+ * @param aAPR - the APR datastructure
+ * @param input_image - input image
  */
 template<typename ImageType> template<typename T>
-inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_image) {
-
-    aAPR.parameters = par;
-
-    if(par.check_input) {
-        if(!check_input_dimensions(input_image)) {
-            std::cout << "Input dimension check failed. Make sure the input image is filled in order x -> y -> z, or try using the option -swap_dimension" << std::endl;
-            return false;
-        }
-    }
-
-
-    initPipelineAPR(aAPR, input_image.y_num, input_image.x_num, input_image.z_num);
-
-#ifndef APR_USE_CUDA
-
-    total_timer.start_timer("full_pipeline");
-
-    computation_timer.start_timer("init_mem");
-
-    initPipelineMemory(input_image.y_num, input_image.x_num, input_image.z_num);
-
-    computation_timer.stop_timer();
-
-
-    computation_timer.start_timer("compute_L");
-
-    //Compute the local resolution estimate
-    computeL(aAPR,input_image);
-
-    computation_timer.stop_timer();
-
-    computation_timer.start_timer("apply_parameters");
-
-    if( par.auto_parameters ) {
-        method_timer.start_timer("autoParameters");
-//        autoParameters(local_scale_temp,grad_temp);
-        autoParametersLiEntropy(local_scale_temp2, local_scale_temp, grad_temp);
-        aAPR.parameters = par;
-        method_timer.stop_timer();
-    }
-
-    applyParameters(aAPR,par);
-
-    computation_timer.stop_timer();
-
-    computation_timer.start_timer("solve_for_apr");
-
-    solveForAPR(aAPR);
-
-    computation_timer.stop_timer();
-
-    computation_timer.start_timer("generate_data_structures");
-
-    generateDatastructures(aAPR);
-
-    computation_timer.stop_timer();
-
-    total_timer.stop_timer();
-
-#else
+inline bool APRConverter<ImageType>::get_apr_cuda(APR &aAPR, PixelData<T>& input_image) {
+    if (!initPipelineAPR(aAPR, input_image)) return false;
 
 
     initPipelineMemory(input_image.y_num, input_image.x_num, input_image.z_num);
@@ -501,19 +462,16 @@ inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_imag
         computation_timer.start_timer("init_mem");
         PixelData<ImageType> image_temp(input_image, false /* don't copy */, true /* pinned memory */); // global image variable useful for passing between methods, or re-using memory (should be the only full sized copy of the image)
 
-
         /////////////////////////////////
         /// Pipeline
         ////////////////////////
-
-
         //offset image by factor (this is required if there are zero areas in the background with uint16_t and uint8_t images, as the Bspline co-efficients otherwise may be negative!)
         // Warning both of these could result in over-flow (if your image is non zero, with a 'buffer' and has intensities up to uint16_t maximum value then set image_type = "", i.e. uncomment the following line)
 
         if (std::is_same<uint16_t, ImageType>::value) {
             bspline_offset = 100;
             image_temp.copyFromMeshWithUnaryOp(input_image, [=](const auto &a) { return (a + bspline_offset); });
-        } else if (std::is_same<uint8_t, ImageType>::value){
+        } else if (std::is_same<uint8_t, ImageType>::value) {
             bspline_offset = 5;
             image_temp.copyFromMeshWithUnaryOp(input_image, [=](const auto &a) { return (a + bspline_offset); });
         } else {
@@ -561,13 +519,13 @@ inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_imag
             PixelData<float> lst(local_scale_temp, true);
 
 #ifdef HAVE_LIBTIFF
-            if(par.output_steps){
+            if (par.output_steps){
                 TiffUtils::saveMeshAsTiff(par.output_dir + "local_intensity_scale_step.tif", lst);
             }
 #endif
 
 #ifdef HAVE_LIBTIFF
-            if(par.output_steps){
+            if (par.output_steps){
                 TiffUtils::saveMeshAsTiff(par.output_dir + "gradient_step.tif", grad_temp);
             }
 #endif
@@ -581,19 +539,94 @@ inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T>& input_imag
             computation_timer.start_timer("generate_data_structures");
             generateDatastructures(aAPR);
             computation_timer.stop_timer();
-
-
         }
         std::cout << "Total n ENDED" << std::endl;
 
     }
     t.stop_timer();
     method_timer.stop_timer();
+
+    return true;
+}
 #endif
 
+
+/**
+ * Implementation of pipeline for CPU
+ *
+ * @param aAPR - the APR datastructure
+ * @param input_image - input image
+ */
+template<typename ImageType> template<typename T>
+inline bool APRConverter<ImageType>::get_apr_cpu(APR &aAPR, PixelData<T> &input_image) {
+
+    if (!initPipelineAPR(aAPR, input_image)) return false;
+
+    total_timer.start_timer("full_pipeline");
+
+    computation_timer.start_timer("init_mem");
+
+    initPipelineMemory(input_image.y_num, input_image.x_num, input_image.z_num);
+
+    computation_timer.stop_timer();
+
+    computation_timer.start_timer("compute_L");
+
+    //Compute the local resolution estimate
+    computeL(aAPR,input_image);
+
+    computation_timer.stop_timer();
+
+    computation_timer.start_timer("apply_parameters");
+
+    if (par.auto_parameters) {
+        method_timer.start_timer("autoParameters");
+//        autoParameters(local_scale_temp,grad_temp);
+        autoParametersLiEntropy(local_scale_temp2, local_scale_temp, grad_temp);
+        aAPR.parameters = par;
+        method_timer.stop_timer();
+    }
+
+    applyParameters(aAPR,par);
+
+    computation_timer.stop_timer();
+
+    computation_timer.start_timer("solve_for_apr");
+
+    solveForAPR(aAPR);
+
+    computation_timer.stop_timer();
+
+    computation_timer.start_timer("generate_data_structures");
+
+    generateDatastructures(aAPR);
+
+    computation_timer.stop_timer();
+
+    total_timer.stop_timer();
+
     return true;
 }
 
+
+/**
+ * Main method for constructing the APR from an input image
+ *
+ * @param aAPR - the APR data structure
+ * @param input_image - input image
+ */
+template<typename ImageType> template<typename T>
+inline bool APRConverter<ImageType>::get_apr(APR &aAPR, PixelData<T> &input_image) {
+// TODO: CUDA pipeline is temporarily turned off and CPU version is always chosen.
+//       After revising a CUDA pipeline remove "#if true // " part.
+#if true // #ifndef APR_USE_CUDA
+    return get_apr_cpu(aAPR, input_image);
+#else
+    return get_apr_cuda(aAPR, input_image);
+#endif
+}
+
+
 template<typename T>
 void compute_means(const std::vector<T>& data, float threshold, float& mean_back, float& mean_fore) {
     float sum_fore=0.f, sum_back=0.f;

diff --git a/src/algorithm/APRParameters.hpp b/src/algorithm/APRParameters.hpp
@@ -33,7 +33,6 @@ class APRParameters {
 
     // additional pipeline parameters
     bool reflect_bc_lis = true;
-    int extra_smooth = 0;
     bool check_input = false;
     bool swap_dimensions = false;
     bool neighborhood_optimization = true;

diff --git a/src/algorithm/LocalIntensityScale.hpp b/src/algorithm/LocalIntensityScale.hpp
@@ -109,19 +109,6 @@ void get_local_intensity_scale(PixelData<float> &local_scale_temp, PixelData<flo
             calc_sat_mean_z(local_scale_temp, win_z2);
         }
 
-        // second average for extra smoothing
-        if(par.extra_smooth) {
-            if (active_y) {
-                calc_sat_mean_y(local_scale_temp, par.extra_smooth);
-            }
-            if (active_x) {
-                calc_sat_mean_x(local_scale_temp, par.extra_smooth);
-            }
-            if (active_z) {
-                calc_sat_mean_z(local_scale_temp, par.extra_smooth);
-            }
-        }
-
         rescale_var(local_scale_temp, var_rescale);
         timer.stop_timer();
 

diff --git a/src/data_structures/APR/access/GPUAccess.cu b/src/data_structures/APR/access/GPUAccess.cu
@@ -157,6 +157,10 @@ template class ParticleDataGpu<float>;
 template class ParticleDataGpu<double>;
 template class ParticleDataGpu<int>;
 template class ParticleDataGpu<uint64_t>;
+template class ParticleDataGpu<uint32_t>;
+template class ParticleDataGpu<int8_t>;
+template class ParticleDataGpu<int16_t>;
+template class ParticleDataGpu<int64_t>;
 
 __global__ void fill_y_vec_max_level(const uint64_t* level_xz_vec,
                                      const uint64_t* xz_end_vec,

diff --git a/test/APRTest.cpp b/test/APRTest.cpp
@@ -2879,7 +2879,7 @@ bool test_pipeline_u16(TestData& test_data){
     }
 
     APR apr_c;
-    aprConverter.initPipelineAPR(apr_c, test_data.img_original.y_num, test_data.img_original.x_num, test_data.img_original.z_num);
+    aprConverter.initPipelineAPR(apr_c, test_data.img_original);
 
     aprConverter.get_apr_custom_grad_scale(apr_c,gradient_saved,scale_saved);