L1 HSGS (#927)

This PR provides a convergent l1-hybrid symmetric Gauss-Seidel (HSGS) method.
hypre-space · Sep 5, 2023 · 36ab29b · 36ab29b
1 parent cd8f9c3
commit 36ab29b
Show file tree

Hide file tree

Showing 30 changed files with 51,554 additions and 23 deletions.
diff --git a/src/parcsr_ls/HYPRE_parcsr_ls.h b/src/parcsr_ls/HYPRE_parcsr_ls.h
@@ -741,14 +741,32 @@ HYPRE_Int HYPRE_BoomerAMGSetGridRelaxType(HYPRE_Solver  solver,
  *    - 4  : hybrid Gauss-Seidel or SOR, backward solve
  *    - 5  : hybrid chaotic Gauss-Seidel (works only with OpenMP)
  *    - 6  : hybrid symmetric Gauss-Seidel or SSOR
+ *    - 7  : Jacobi (uses Matvec)
  *    - 8  : \f$\ell_1\f$-scaled hybrid symmetric Gauss-Seidel
  *    - 9  : Gaussian elimination (only on coarsest level)
+ *    - 10 : On-processor direct forward solve for matrices with
+ *           triangular structure
+ *    - 11 : Two Stage approximation to GS. Uses the strict lower
+ *           part of the diagonal matrix
+ *    - 12 : Two Stage approximation to GS. Uses the strict lower
+ *           part of the diagonal matrix and a second iteration
+ *           for additional error approximation
  *    - 13 : \f$\ell_1\f$ Gauss-Seidel, forward solve
  *    - 14 : \f$\ell_1\f$ Gauss-Seidel, backward solve
  *    - 15 : CG (warning - not a fixed smoother - may require FGMRES)
  *    - 16 : Chebyshev
  *    - 17 : FCF-Jacobi
  *    - 18 : \f$\ell_1\f$-scaled jacobi
+ *    - 19 : Gaussian elimination (old version)
+ *    - 21 : The same as 8 except forcing serialization on CPU (#OMP-thread = 1)
+ *    - 29 : Direct solve: use Gaussian elimination & BLAS
+ *                        (with pivoting) (old version)
+ *    - 30 : Kaczmarz
+ *    - 88:  The same methods as 8 with a convergent l1-term
+ *    - 89:  Symmetric l1-hybrid Gauss-Seidel (i.e., 13 followed by 14)
+ *    - 98 : LU with pivoting
+ *    - 99 : LU with pivoting
+ *    -199 : Matvec with the inverse
  **/
 HYPRE_Int HYPRE_BoomerAMGSetRelaxType(HYPRE_Solver  solver,
                                       HYPRE_Int     relax_type);

diff --git a/src/parcsr_ls/_hypre_parcsr_ls.h b/src/parcsr_ls/_hypre_parcsr_ls.h
@@ -3159,6 +3159,14 @@ HYPRE_Int hypre_BoomerAMGRelax18WeightedL1Jacobi( hypre_ParCSRMatrix *A, hypre_P
 HYPRE_Int hypre_BoomerAMGRelax19GaussElim( hypre_ParCSRMatrix *A, hypre_ParVector *f,
                                            hypre_ParVector *u );
 
+HYPRE_Int hypre_BoomerAMGRelax88HybridL1SSOR( hypre_ParCSRMatrix *A, hypre_ParVector *f,
+                                              HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega,
+                                              HYPRE_Real *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );
+
+HYPRE_Int hypre_BoomerAMGRelax89HybridL1SSOR( hypre_ParCSRMatrix *A, hypre_ParVector *f,
+                                              HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega,
+                                              HYPRE_Real *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );
+
 HYPRE_Int hypre_BoomerAMGRelax98GaussElimPivot( hypre_ParCSRMatrix *A, hypre_ParVector *f,
                                                 hypre_ParVector *u );
 

diff --git a/src/parcsr_ls/ams.c b/src/parcsr_ls/ams.c
@@ -503,6 +503,21 @@ struct l1_norm_op1 : public thrust::binary_function<HYPRE_Complex, HYPRE_Complex
 };
 #endif
 
+#if defined(HYPRE_USING_GPU)
+#if defined(HYPRE_USING_SYCL)
+struct l1_norm_op6
+#else
+struct l1_norm_op6 : public thrust::binary_function<HYPRE_Complex, HYPRE_Complex, HYPRE_Complex>
+#endif
+{
+   __host__ __device__
+   HYPRE_Complex operator()(const HYPRE_Complex &d, const HYPRE_Complex &l) const
+   {
+      return (l + d + sqrt(l * l + d * d)) * 0.5;
+   }
+};
+#endif
+
 HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix  *A,
                                      HYPRE_Int            option,
                                      HYPRE_Int           *cf_marker,
@@ -700,6 +715,34 @@ HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix  *A,
 
       return hypre_error_flag;
    }
+   else if (option == 6)
+   {
+      /* Set the abs(diag) element */
+      hypre_CSRMatrixExtractDiagonal(A_diag, l1_norm, 1);
+      /* Add the scaled l1 norm of the offd part */
+      if (num_cols_offd)
+      {
+         diag_tmp = hypre_TAlloc(HYPRE_Real, num_rows, memory_location_tmp);
+         hypre_CSRMatrixComputeRowSum(A_offd, cf_marker, cf_marker_offd, diag_tmp, 1, 1.0, "set");
+#if defined(HYPRE_USING_GPU)
+         if (exec == HYPRE_EXEC_DEVICE)
+         {
+#if defined(HYPRE_USING_SYCL)
+            HYPRE_ONEDPL_CALL( std::transform, l1_norm, l1_norm + num_rows, diag_tmp, l1_norm, l1_norm_op6() );
+#else
+            HYPRE_THRUST_CALL( transform, l1_norm, l1_norm + num_rows, diag_tmp, l1_norm, l1_norm_op6() );
+#endif
+         }
+         else
+#endif
+         {
+            for (i = 0; i < num_rows; i++)
+            {
+               l1_norm[i] = (diag_tmp[i] + l1_norm[i] + hypre_sqrt(diag_tmp[i] * diag_tmp[i] + l1_norm[i] * l1_norm[i])) * 0.5;
+            }
+         }
+      }
+   }
 
    /* Handle negative definite matrices */
    if (!diag_tmp)
@@ -4705,7 +4748,6 @@ HYPRE_Int hypre_ParCSRComputeL1NormsThreads(hypre_ParCSRMatrix *A,
             }
          }
       }
-
       else if (option == 5) /*stores diagonal of A for Jacobi using matvec, rlx 7 */
       {
          /* Set the diag element */
@@ -4715,6 +4757,77 @@ HYPRE_Int hypre_ParCSRComputeL1NormsThreads(hypre_ParCSRMatrix *A,
             if (l1_norm[i] == 0) { l1_norm[i] = 1.0; }
          }
       }
+      else if (option == 6)
+      {
+         for (i = ns; i < ne; i++)
+         {
+            l1_norm[i] = 0.0;
+
+            if (cf_marker == NULL)
+            {
+               /* Add the diagonal and the local off-thread part of the ith row */
+               for (j = A_diag_I[i]; j < A_diag_I[i + 1]; j++)
+               {
+                  ii = A_diag_J[j];
+                  if (ii == i || ii < ns || ii >= ne)
+                  {
+                     if (ii == i)
+                     {
+                        diag = hypre_abs(A_diag_data[j]);
+                     }
+                     else
+                     {
+                        l1_norm[i] += 0.5 * hypre_abs(A_diag_data[j]);
+                     }
+                  }
+               }
+               /* Add the l1 norm of the offd part of the ith row */
+               if (num_cols_offd)
+               {
+                  for (j = A_offd_I[i]; j < A_offd_I[i + 1]; j++)
+                  {
+                     l1_norm[i] += 0.5 * hypre_abs(A_offd_data[j]);
+                  }
+               }
+
+               l1_norm[i] = (diag + l1_norm[i] + hypre_sqrt(diag * diag + l1_norm[i] * l1_norm[i])) * 0.5;
+            }
+            else
+            {
+               cf_diag = cf_marker[i];
+               /* Add the diagonal and the local off-thread part of the ith row */
+               for (j = A_diag_I[i]; j < A_diag_I[i + 1]; j++)
+               {
+                  ii = A_diag_J[j];
+                  if ((ii == i || ii < ns || ii >= ne) &&
+                      (cf_diag == cf_marker[A_diag_J[j]]))
+                  {
+                     if (ii == i)
+                     {
+                        diag = hypre_abs(A_diag_data[j]);
+                     }
+                     else
+                     {
+                        l1_norm[i] += 0.5 * hypre_abs(A_diag_data[j]);
+                     }
+                  }
+               }
+               /* Add the CF l1 norm of the offd part of the ith row */
+               if (num_cols_offd)
+               {
+                  for (j = A_offd_I[i]; j < A_offd_I[i + 1]; j++)
+                  {
+                     if (cf_diag == cf_marker_offd[A_offd_J[j]])
+                     {
+                        l1_norm[i] += 0.5 * hypre_abs(A_offd_data[j]);
+                     }
+                  }
+               }
+
+               l1_norm[i] = (diag + l1_norm[i] + hypre_sqrt(diag * diag + l1_norm[i] * l1_norm[i])) * 0.5;
+            }
+         }
+      }
 
       if (option < 5)
       {

diff --git a/src/parcsr_ls/par_amg_setup.c b/src/parcsr_ls/par_amg_setup.c
@@ -364,16 +364,16 @@ hypre_BoomerAMGSetup( void               *amg_vdata,
                         "WARNING: Changing to node-based coarsening because LN of GM interpolation has been specified via HYPRE_BoomerAMGSetInterpVecVariant.\n");
    }
 
-   /* Verify that settings are correct for solving systmes */
+   /* Verify that settings are correct for solving systems */
    /* If the user has specified either a block interpolation or a block relaxation then
-      we need to make sure the other has been choosen as well  - so we can be
+      we need to make sure the other has been chosen as well  - so we can be
       in "block mode" - storing only block matrices on the coarse levels*/
    /* Furthermore, if we are using systems and nodal = 0, then
       we will change nodal to 1 */
-   /* probably should disable stuff like smooth num levels at some point */
+   /* probably should disable stuff like smooth number levels at some point */
 
 
-   if (grid_relax_type[0] >= 20) /* block relaxation choosen */
+   if (grid_relax_type[0] >= 20 && grid_relax_type[0] != 30 && grid_relax_type[0] != 88 && grid_relax_type[0] != 89) /* block relaxation chosen */
    {
 
       if (!((interp_type >= 20 && interp_type != 100) || interp_type == 11 || interp_type == 10 ) )
@@ -881,7 +881,8 @@ hypre_BoomerAMGSetup( void               *amg_vdata,
       {
          if (grid_relax_type[j] ==  3 || grid_relax_type[j] ==  4 || grid_relax_type[j] ==  6 ||
              grid_relax_type[j] ==  8 || grid_relax_type[j] == 13 || grid_relax_type[j] == 14 ||
-             grid_relax_type[j] == 11 || grid_relax_type[j] == 12)
+             grid_relax_type[j] == 11 || grid_relax_type[j] == 12 || grid_relax_type[j] == 88 ||
+             grid_relax_type[j] == 89)
          {
             needZ = hypre_max(needZ, 1);
             break;
@@ -3240,7 +3241,10 @@ hypre_BoomerAMGSetup( void               *amg_vdata,
        grid_relax_type[1] == 12 || grid_relax_type[2] == 12 || grid_relax_type[3] == 12 ||
        grid_relax_type[1] == 13 || grid_relax_type[2] == 13 || grid_relax_type[3] == 13 ||
        grid_relax_type[1] == 14 || grid_relax_type[2] == 14 || grid_relax_type[3] == 14 ||
-       grid_relax_type[1] == 18 || grid_relax_type[2] == 18 || grid_relax_type[3] == 18)
+       grid_relax_type[1] == 18 || grid_relax_type[2] == 18 || grid_relax_type[3] == 18 ||
+       grid_relax_type[1] == 30 || grid_relax_type[2] == 30 || grid_relax_type[3] == 30 ||
+       grid_relax_type[1] == 88 || grid_relax_type[2] == 88 || grid_relax_type[3] == 88 ||
+       grid_relax_type[1] == 89 || grid_relax_type[2] == 89 || grid_relax_type[3] == 89)
    {
       l1_norms = hypre_CTAlloc(hypre_Vector*, num_levels, HYPRE_MEMORY_HOST);
       hypre_ParAMGDataL1Norms(amg_data) = l1_norms;
@@ -3286,8 +3290,8 @@ hypre_BoomerAMGSetup( void               *amg_vdata,
       hypre_GpuProfilingPushRange(nvtx_name);
 
       if (j < num_levels - 1 &&
-          (grid_relax_type[1] == 8 || grid_relax_type[1] == 13 || grid_relax_type[1] == 14 ||
-           grid_relax_type[2] == 8 || grid_relax_type[2] == 13 || grid_relax_type[2] == 14))
+          (grid_relax_type[1] == 8 || grid_relax_type[1] == 89 || grid_relax_type[1] == 13 || grid_relax_type[1] == 14 ||
+           grid_relax_type[2] == 8 || grid_relax_type[2] == 89 || grid_relax_type[2] == 13 || grid_relax_type[2] == 14))
       {
          if (relax_order)
          {
@@ -3299,11 +3303,43 @@ hypre_BoomerAMGSetup( void               *amg_vdata,
          }
       }
       else if (j == num_levels - 1 &&
-               (grid_relax_type[3] == 8 || grid_relax_type[3] == 13 || grid_relax_type[3] == 14))
+               (grid_relax_type[3] == 8 || grid_relax_type[3] == 89 || grid_relax_type[3] == 13 || grid_relax_type[3] == 14))
       {
          hypre_ParCSRComputeL1Norms(A_array[j], 4, NULL, &l1_norm_data);
       }
 
+      if (j < num_levels - 1 && (grid_relax_type[1] == 30 || grid_relax_type[2] == 30))
+      {
+         if (relax_order)
+         {
+            hypre_ParCSRComputeL1Norms(A_array[j], 3, hypre_IntArrayData(CF_marker_array[j]), &l1_norm_data);
+         }
+         else
+         {
+            hypre_ParCSRComputeL1Norms(A_array[j], 3, NULL, &l1_norm_data);
+         }
+      }
+      else if (j == num_levels - 1 && grid_relax_type[3] == 30)
+      {
+         hypre_ParCSRComputeL1Norms(A_array[j], 3, NULL, &l1_norm_data);
+      }
+
+      if (j < num_levels - 1 && (grid_relax_type[1] == 88 || grid_relax_type[2] == 88 ))
+      {
+         if (relax_order)
+         {
+            hypre_ParCSRComputeL1Norms(A_array[j], 6, hypre_IntArrayData(CF_marker_array[j]), &l1_norm_data);
+         }
+         else
+         {
+            hypre_ParCSRComputeL1Norms(A_array[j], 6, NULL, &l1_norm_data);
+         }
+      }
+      else if (j == num_levels - 1 && (grid_relax_type[3] == 88))
+      {
+         hypre_ParCSRComputeL1Norms(A_array[j], 6, NULL, &l1_norm_data);
+      }
+
       if (j < num_levels - 1 && (grid_relax_type[1] == 18 || grid_relax_type[2] == 18))
       {
          if (relax_order)

diff --git a/src/parcsr_ls/par_relax.c b/src/parcsr_ls/par_relax.c
@@ -67,7 +67,7 @@ hypre_BoomerAMGRelax( hypre_ParCSRMatrix *A,
     *     relax_type = 17 -> FCF-Jacobi
     *     relax_type = 18 -> L1-Jacobi [GPU-supported through call to relax7Jacobi]
     *     relax_type = 19 -> Direct Solve, (old version)
-    *     relax_type = 20 -> Kaczmarz
+    *     relax_type = 30 -> Kaczmarz
     *     relax_type = 21 -> the same as 8 except forcing serialization on CPU (#OMP-thread = 1)
     *     relax_type = 29 -> Direct solve: use Gaussian elimination & BLAS
     *                        (with pivoting) (old version)
@@ -119,7 +119,8 @@ hypre_BoomerAMGRelax( hypre_ParCSRMatrix *A,
                                      relax_weight, l1_norms, u, Vtemp);
          break;
 
-      case 8: /* hybrid L1 Symm. Gauss-Seidel */
+      case 8: /* L1 hybrid Symm. Gauss-Seidel */
+      case 88: /* L1 hybrid Symm. Gauss-Seidel (with a convergent l1 term) */
          hypre_BoomerAMGRelax8HybridL1SSOR(A, f, cf_marker, relax_points,
                                            relax_weight, omega, l1_norms, u,
                                            Vtemp, Ztemp);
@@ -166,10 +167,16 @@ hypre_BoomerAMGRelax( hypre_ParCSRMatrix *A,
          relax_error = hypre_BoomerAMGRelax19GaussElim(A, f, u);
          break;
 
-      case 20: /* Kaczmarz */
+      case 30: /* Kaczmarz */
          hypre_BoomerAMGRelaxKaczmarz(A, f, omega, l1_norms, u);
          break;
 
+      case 89: /* L1 Symm. hybrid Gauss-Seidel */
+         hypre_BoomerAMGRelax89HybridL1SSOR(A, f, cf_marker, relax_points,
+                                            relax_weight, omega, l1_norms, u,
+                                            Vtemp, Ztemp);
+         break;
+
       case 98: /* Direct solve: use gaussian elimination & BLAS (with pivoting) */
          relax_error = hypre_BoomerAMGRelax98GaussElimPivot(A, f, u);
          break;
@@ -1196,25 +1203,47 @@ hypre_BoomerAMGRelax7Jacobi( hypre_ParCSRMatrix *A,
    return hypre_error_flag;
 }
 
-/* symmetric l1 hybrid G-S */
+/* l1 hybrid symmetric G-S */
 HYPRE_Int
 hypre_BoomerAMGRelax8HybridL1SSOR( hypre_ParCSRMatrix *A,
-                                   hypre_ParVector    *f,
-                                   HYPRE_Int          *cf_marker,
-                                   HYPRE_Int           relax_points,
-                                   HYPRE_Real          relax_weight,
-                                   HYPRE_Real          omega,
-                                   HYPRE_Real         *l1_norms,
-                                   hypre_ParVector    *u,
-                                   hypre_ParVector    *Vtemp,
-                                   hypre_ParVector    *Ztemp )
+                                    hypre_ParVector    *f,
+                                    HYPRE_Int          *cf_marker,
+                                    HYPRE_Int           relax_points,
+                                    HYPRE_Real          relax_weight,
+                                    HYPRE_Real          omega,
+                                    HYPRE_Real         *l1_norms,
+                                    hypre_ParVector    *u,
+                                    hypre_ParVector    *Vtemp,
+                                    hypre_ParVector    *Ztemp )
 {
    const HYPRE_Int skip_diag = relax_weight == 1.0 && omega == 1.0 ? 0 : 1;
 
    return hypre_BoomerAMGRelaxHybridSOR(A, f, cf_marker, relax_points, relax_weight,
                                         omega, l1_norms, u, Vtemp, Ztemp, 1, 1, skip_diag, 0);
 }
 
+/* l1 symmetric hybrid G-S */
+HYPRE_Int
+hypre_BoomerAMGRelax89HybridL1SSOR( hypre_ParCSRMatrix *A,
+                                    hypre_ParVector    *f,
+                                    HYPRE_Int          *cf_marker,
+                                    HYPRE_Int           relax_points,
+                                    HYPRE_Real          relax_weight,
+                                    HYPRE_Real          omega,
+                                    HYPRE_Real         *l1_norms,
+                                    hypre_ParVector    *u,
+                                    hypre_ParVector    *Vtemp,
+                                    hypre_ParVector    *Ztemp )
+{
+   hypre_BoomerAMGRelax13HybridL1GaussSeidel(A, f, cf_marker, relax_points, relax_weight,
+                                             omega, l1_norms, u, Vtemp, Ztemp);
+
+   hypre_BoomerAMGRelax14HybridL1GaussSeidel(A, f, cf_marker, relax_points, relax_weight,
+                                             omega, l1_norms, u, Vtemp, Ztemp);
+
+   return hypre_error_flag;
+}
+
 /* forward hybrid topology ordered G-S */
 HYPRE_Int
 hypre_BoomerAMGRelax10TopoOrderedGaussSeidel( hypre_ParCSRMatrix *A,
@@ -1357,6 +1386,7 @@ hypre_BoomerAMGRelax19GaussElim( hypre_ParCSRMatrix *A,
    return relax_error;
 }
 
+
 HYPRE_Int
 hypre_BoomerAMGRelax98GaussElimPivot( hypre_ParCSRMatrix *A,
                                       hypre_ParVector    *f,