Skip to content

Commit

Permalink
L1 HSGS (#927)
Browse files Browse the repository at this point in the history
This PR provides a convergent l1-hybrid symmetric Gauss-Seidel (HSGS) method.
  • Loading branch information
liruipeng authored Sep 5, 2023
1 parent cd8f9c3 commit 36ab29b
Show file tree
Hide file tree
Showing 30 changed files with 51,554 additions and 23 deletions.
18 changes: 18 additions & 0 deletions src/parcsr_ls/HYPRE_parcsr_ls.h
Original file line number Diff line number Diff line change
Expand Up @@ -741,14 +741,32 @@ HYPRE_Int HYPRE_BoomerAMGSetGridRelaxType(HYPRE_Solver solver,
* - 4 : hybrid Gauss-Seidel or SOR, backward solve
* - 5 : hybrid chaotic Gauss-Seidel (works only with OpenMP)
* - 6 : hybrid symmetric Gauss-Seidel or SSOR
* - 7 : Jacobi (uses Matvec)
* - 8 : \f$\ell_1\f$-scaled hybrid symmetric Gauss-Seidel
* - 9 : Gaussian elimination (only on coarsest level)
* - 10 : On-processor direct forward solve for matrices with
* triangular structure
* - 11 : Two Stage approximation to GS. Uses the strict lower
* part of the diagonal matrix
* - 12 : Two Stage approximation to GS. Uses the strict lower
* part of the diagonal matrix and a second iteration
* for additional error approximation
* - 13 : \f$\ell_1\f$ Gauss-Seidel, forward solve
* - 14 : \f$\ell_1\f$ Gauss-Seidel, backward solve
* - 15 : CG (warning - not a fixed smoother - may require FGMRES)
* - 16 : Chebyshev
* - 17 : FCF-Jacobi
* - 18 : \f$\ell_1\f$-scaled jacobi
* - 19 : Gaussian elimination (old version)
* - 21 : The same as 8 except forcing serialization on CPU (#OMP-thread = 1)
* - 29 : Direct solve: use Gaussian elimination & BLAS
* (with pivoting) (old version)
* - 30 : Kaczmarz
* - 88: The same methods as 8 with a convergent l1-term
* - 89: Symmetric l1-hybrid Gauss-Seidel (i.e., 13 followed by 14)
* - 98 : LU with pivoting
* - 99 : LU with pivoting
* -199 : Matvec with the inverse
**/
HYPRE_Int HYPRE_BoomerAMGSetRelaxType(HYPRE_Solver solver,
HYPRE_Int relax_type);
Expand Down
8 changes: 8 additions & 0 deletions src/parcsr_ls/_hypre_parcsr_ls.h
Original file line number Diff line number Diff line change
Expand Up @@ -3159,6 +3159,14 @@ HYPRE_Int hypre_BoomerAMGRelax18WeightedL1Jacobi( hypre_ParCSRMatrix *A, hypre_P
HYPRE_Int hypre_BoomerAMGRelax19GaussElim( hypre_ParCSRMatrix *A, hypre_ParVector *f,
hypre_ParVector *u );

HYPRE_Int hypre_BoomerAMGRelax88HybridL1SSOR( hypre_ParCSRMatrix *A, hypre_ParVector *f,
HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega,
HYPRE_Real *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );

HYPRE_Int hypre_BoomerAMGRelax89HybridL1SSOR( hypre_ParCSRMatrix *A, hypre_ParVector *f,
HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega,
HYPRE_Real *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );

HYPRE_Int hypre_BoomerAMGRelax98GaussElimPivot( hypre_ParCSRMatrix *A, hypre_ParVector *f,
hypre_ParVector *u );

Expand Down
115 changes: 114 additions & 1 deletion src/parcsr_ls/ams.c
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,21 @@ struct l1_norm_op1 : public thrust::binary_function<HYPRE_Complex, HYPRE_Complex
};
#endif

#if defined(HYPRE_USING_GPU)
#if defined(HYPRE_USING_SYCL)
struct l1_norm_op6
#else
struct l1_norm_op6 : public thrust::binary_function<HYPRE_Complex, HYPRE_Complex, HYPRE_Complex>
#endif
{
__host__ __device__
HYPRE_Complex operator()(const HYPRE_Complex &d, const HYPRE_Complex &l) const
{
return (l + d + sqrt(l * l + d * d)) * 0.5;
}
};
#endif

HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A,
HYPRE_Int option,
HYPRE_Int *cf_marker,
Expand Down Expand Up @@ -700,6 +715,34 @@ HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A,

return hypre_error_flag;
}
else if (option == 6)
{
/* Set the abs(diag) element */
hypre_CSRMatrixExtractDiagonal(A_diag, l1_norm, 1);
/* Add the scaled l1 norm of the offd part */
if (num_cols_offd)
{
diag_tmp = hypre_TAlloc(HYPRE_Real, num_rows, memory_location_tmp);
hypre_CSRMatrixComputeRowSum(A_offd, cf_marker, cf_marker_offd, diag_tmp, 1, 1.0, "set");
#if defined(HYPRE_USING_GPU)
if (exec == HYPRE_EXEC_DEVICE)
{
#if defined(HYPRE_USING_SYCL)
HYPRE_ONEDPL_CALL( std::transform, l1_norm, l1_norm + num_rows, diag_tmp, l1_norm, l1_norm_op6() );
#else
HYPRE_THRUST_CALL( transform, l1_norm, l1_norm + num_rows, diag_tmp, l1_norm, l1_norm_op6() );
#endif
}
else
#endif
{
for (i = 0; i < num_rows; i++)
{
l1_norm[i] = (diag_tmp[i] + l1_norm[i] + hypre_sqrt(diag_tmp[i] * diag_tmp[i] + l1_norm[i] * l1_norm[i])) * 0.5;
}
}
}
}

/* Handle negative definite matrices */
if (!diag_tmp)
Expand Down Expand Up @@ -4705,7 +4748,6 @@ HYPRE_Int hypre_ParCSRComputeL1NormsThreads(hypre_ParCSRMatrix *A,
}
}
}

else if (option == 5) /*stores diagonal of A for Jacobi using matvec, rlx 7 */
{
/* Set the diag element */
Expand All @@ -4715,6 +4757,77 @@ HYPRE_Int hypre_ParCSRComputeL1NormsThreads(hypre_ParCSRMatrix *A,
if (l1_norm[i] == 0) { l1_norm[i] = 1.0; }
}
}
else if (option == 6)
{
for (i = ns; i < ne; i++)
{
l1_norm[i] = 0.0;

if (cf_marker == NULL)
{
/* Add the diagonal and the local off-thread part of the ith row */
for (j = A_diag_I[i]; j < A_diag_I[i + 1]; j++)
{
ii = A_diag_J[j];
if (ii == i || ii < ns || ii >= ne)
{
if (ii == i)
{
diag = hypre_abs(A_diag_data[j]);
}
else
{
l1_norm[i] += 0.5 * hypre_abs(A_diag_data[j]);
}
}
}
/* Add the l1 norm of the offd part of the ith row */
if (num_cols_offd)
{
for (j = A_offd_I[i]; j < A_offd_I[i + 1]; j++)
{
l1_norm[i] += 0.5 * hypre_abs(A_offd_data[j]);
}
}

l1_norm[i] = (diag + l1_norm[i] + hypre_sqrt(diag * diag + l1_norm[i] * l1_norm[i])) * 0.5;
}
else
{
cf_diag = cf_marker[i];
/* Add the diagonal and the local off-thread part of the ith row */
for (j = A_diag_I[i]; j < A_diag_I[i + 1]; j++)
{
ii = A_diag_J[j];
if ((ii == i || ii < ns || ii >= ne) &&
(cf_diag == cf_marker[A_diag_J[j]]))
{
if (ii == i)
{
diag = hypre_abs(A_diag_data[j]);
}
else
{
l1_norm[i] += 0.5 * hypre_abs(A_diag_data[j]);
}
}
}
/* Add the CF l1 norm of the offd part of the ith row */
if (num_cols_offd)
{
for (j = A_offd_I[i]; j < A_offd_I[i + 1]; j++)
{
if (cf_diag == cf_marker_offd[A_offd_J[j]])
{
l1_norm[i] += 0.5 * hypre_abs(A_offd_data[j]);
}
}
}

l1_norm[i] = (diag + l1_norm[i] + hypre_sqrt(diag * diag + l1_norm[i] * l1_norm[i])) * 0.5;
}
}
}

if (option < 5)
{
Expand Down
54 changes: 45 additions & 9 deletions src/parcsr_ls/par_amg_setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -364,16 +364,16 @@ hypre_BoomerAMGSetup( void *amg_vdata,
"WARNING: Changing to node-based coarsening because LN of GM interpolation has been specified via HYPRE_BoomerAMGSetInterpVecVariant.\n");
}

/* Verify that settings are correct for solving systmes */
/* Verify that settings are correct for solving systems */
/* If the user has specified either a block interpolation or a block relaxation then
we need to make sure the other has been choosen as well - so we can be
we need to make sure the other has been chosen as well - so we can be
in "block mode" - storing only block matrices on the coarse levels*/
/* Furthermore, if we are using systems and nodal = 0, then
we will change nodal to 1 */
/* probably should disable stuff like smooth num levels at some point */
/* probably should disable stuff like smooth number levels at some point */


if (grid_relax_type[0] >= 20) /* block relaxation choosen */
if (grid_relax_type[0] >= 20 && grid_relax_type[0] != 30 && grid_relax_type[0] != 88 && grid_relax_type[0] != 89) /* block relaxation chosen */
{

if (!((interp_type >= 20 && interp_type != 100) || interp_type == 11 || interp_type == 10 ) )
Expand Down Expand Up @@ -881,7 +881,8 @@ hypre_BoomerAMGSetup( void *amg_vdata,
{
if (grid_relax_type[j] == 3 || grid_relax_type[j] == 4 || grid_relax_type[j] == 6 ||
grid_relax_type[j] == 8 || grid_relax_type[j] == 13 || grid_relax_type[j] == 14 ||
grid_relax_type[j] == 11 || grid_relax_type[j] == 12)
grid_relax_type[j] == 11 || grid_relax_type[j] == 12 || grid_relax_type[j] == 88 ||
grid_relax_type[j] == 89)
{
needZ = hypre_max(needZ, 1);
break;
Expand Down Expand Up @@ -3240,7 +3241,10 @@ hypre_BoomerAMGSetup( void *amg_vdata,
grid_relax_type[1] == 12 || grid_relax_type[2] == 12 || grid_relax_type[3] == 12 ||
grid_relax_type[1] == 13 || grid_relax_type[2] == 13 || grid_relax_type[3] == 13 ||
grid_relax_type[1] == 14 || grid_relax_type[2] == 14 || grid_relax_type[3] == 14 ||
grid_relax_type[1] == 18 || grid_relax_type[2] == 18 || grid_relax_type[3] == 18)
grid_relax_type[1] == 18 || grid_relax_type[2] == 18 || grid_relax_type[3] == 18 ||
grid_relax_type[1] == 30 || grid_relax_type[2] == 30 || grid_relax_type[3] == 30 ||
grid_relax_type[1] == 88 || grid_relax_type[2] == 88 || grid_relax_type[3] == 88 ||
grid_relax_type[1] == 89 || grid_relax_type[2] == 89 || grid_relax_type[3] == 89)
{
l1_norms = hypre_CTAlloc(hypre_Vector*, num_levels, HYPRE_MEMORY_HOST);
hypre_ParAMGDataL1Norms(amg_data) = l1_norms;
Expand Down Expand Up @@ -3286,8 +3290,8 @@ hypre_BoomerAMGSetup( void *amg_vdata,
hypre_GpuProfilingPushRange(nvtx_name);

if (j < num_levels - 1 &&
(grid_relax_type[1] == 8 || grid_relax_type[1] == 13 || grid_relax_type[1] == 14 ||
grid_relax_type[2] == 8 || grid_relax_type[2] == 13 || grid_relax_type[2] == 14))
(grid_relax_type[1] == 8 || grid_relax_type[1] == 89 || grid_relax_type[1] == 13 || grid_relax_type[1] == 14 ||
grid_relax_type[2] == 8 || grid_relax_type[2] == 89 || grid_relax_type[2] == 13 || grid_relax_type[2] == 14))
{
if (relax_order)
{
Expand All @@ -3299,11 +3303,43 @@ hypre_BoomerAMGSetup( void *amg_vdata,
}
}
else if (j == num_levels - 1 &&
(grid_relax_type[3] == 8 || grid_relax_type[3] == 13 || grid_relax_type[3] == 14))
(grid_relax_type[3] == 8 || grid_relax_type[3] == 89 || grid_relax_type[3] == 13 || grid_relax_type[3] == 14))
{
hypre_ParCSRComputeL1Norms(A_array[j], 4, NULL, &l1_norm_data);
}

if (j < num_levels - 1 && (grid_relax_type[1] == 30 || grid_relax_type[2] == 30))
{
if (relax_order)
{
hypre_ParCSRComputeL1Norms(A_array[j], 3, hypre_IntArrayData(CF_marker_array[j]), &l1_norm_data);
}
else
{
hypre_ParCSRComputeL1Norms(A_array[j], 3, NULL, &l1_norm_data);
}
}
else if (j == num_levels - 1 && grid_relax_type[3] == 30)
{
hypre_ParCSRComputeL1Norms(A_array[j], 3, NULL, &l1_norm_data);
}

if (j < num_levels - 1 && (grid_relax_type[1] == 88 || grid_relax_type[2] == 88 ))
{
if (relax_order)
{
hypre_ParCSRComputeL1Norms(A_array[j], 6, hypre_IntArrayData(CF_marker_array[j]), &l1_norm_data);
}
else
{
hypre_ParCSRComputeL1Norms(A_array[j], 6, NULL, &l1_norm_data);
}
}
else if (j == num_levels - 1 && (grid_relax_type[3] == 88))
{
hypre_ParCSRComputeL1Norms(A_array[j], 6, NULL, &l1_norm_data);
}

if (j < num_levels - 1 && (grid_relax_type[1] == 18 || grid_relax_type[2] == 18))
{
if (relax_order)
Expand Down
56 changes: 43 additions & 13 deletions src/parcsr_ls/par_relax.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ hypre_BoomerAMGRelax( hypre_ParCSRMatrix *A,
* relax_type = 17 -> FCF-Jacobi
* relax_type = 18 -> L1-Jacobi [GPU-supported through call to relax7Jacobi]
* relax_type = 19 -> Direct Solve, (old version)
* relax_type = 20 -> Kaczmarz
* relax_type = 30 -> Kaczmarz
* relax_type = 21 -> the same as 8 except forcing serialization on CPU (#OMP-thread = 1)
* relax_type = 29 -> Direct solve: use Gaussian elimination & BLAS
* (with pivoting) (old version)
Expand Down Expand Up @@ -119,7 +119,8 @@ hypre_BoomerAMGRelax( hypre_ParCSRMatrix *A,
relax_weight, l1_norms, u, Vtemp);
break;

case 8: /* hybrid L1 Symm. Gauss-Seidel */
case 8: /* L1 hybrid Symm. Gauss-Seidel */
case 88: /* L1 hybrid Symm. Gauss-Seidel (with a convergent l1 term) */
hypre_BoomerAMGRelax8HybridL1SSOR(A, f, cf_marker, relax_points,
relax_weight, omega, l1_norms, u,
Vtemp, Ztemp);
Expand Down Expand Up @@ -166,10 +167,16 @@ hypre_BoomerAMGRelax( hypre_ParCSRMatrix *A,
relax_error = hypre_BoomerAMGRelax19GaussElim(A, f, u);
break;

case 20: /* Kaczmarz */
case 30: /* Kaczmarz */
hypre_BoomerAMGRelaxKaczmarz(A, f, omega, l1_norms, u);
break;

case 89: /* L1 Symm. hybrid Gauss-Seidel */
hypre_BoomerAMGRelax89HybridL1SSOR(A, f, cf_marker, relax_points,
relax_weight, omega, l1_norms, u,
Vtemp, Ztemp);
break;

case 98: /* Direct solve: use gaussian elimination & BLAS (with pivoting) */
relax_error = hypre_BoomerAMGRelax98GaussElimPivot(A, f, u);
break;
Expand Down Expand Up @@ -1196,25 +1203,47 @@ hypre_BoomerAMGRelax7Jacobi( hypre_ParCSRMatrix *A,
return hypre_error_flag;
}

/* symmetric l1 hybrid G-S */
/* l1 hybrid symmetric G-S */
HYPRE_Int
hypre_BoomerAMGRelax8HybridL1SSOR( hypre_ParCSRMatrix *A,
hypre_ParVector *f,
HYPRE_Int *cf_marker,
HYPRE_Int relax_points,
HYPRE_Real relax_weight,
HYPRE_Real omega,
HYPRE_Real *l1_norms,
hypre_ParVector *u,
hypre_ParVector *Vtemp,
hypre_ParVector *Ztemp )
hypre_ParVector *f,
HYPRE_Int *cf_marker,
HYPRE_Int relax_points,
HYPRE_Real relax_weight,
HYPRE_Real omega,
HYPRE_Real *l1_norms,
hypre_ParVector *u,
hypre_ParVector *Vtemp,
hypre_ParVector *Ztemp )
{
const HYPRE_Int skip_diag = relax_weight == 1.0 && omega == 1.0 ? 0 : 1;

return hypre_BoomerAMGRelaxHybridSOR(A, f, cf_marker, relax_points, relax_weight,
omega, l1_norms, u, Vtemp, Ztemp, 1, 1, skip_diag, 0);
}

/* l1 symmetric hybrid G-S */
HYPRE_Int
hypre_BoomerAMGRelax89HybridL1SSOR( hypre_ParCSRMatrix *A,
hypre_ParVector *f,
HYPRE_Int *cf_marker,
HYPRE_Int relax_points,
HYPRE_Real relax_weight,
HYPRE_Real omega,
HYPRE_Real *l1_norms,
hypre_ParVector *u,
hypre_ParVector *Vtemp,
hypre_ParVector *Ztemp )
{
hypre_BoomerAMGRelax13HybridL1GaussSeidel(A, f, cf_marker, relax_points, relax_weight,
omega, l1_norms, u, Vtemp, Ztemp);

hypre_BoomerAMGRelax14HybridL1GaussSeidel(A, f, cf_marker, relax_points, relax_weight,
omega, l1_norms, u, Vtemp, Ztemp);

return hypre_error_flag;
}

/* forward hybrid topology ordered G-S */
HYPRE_Int
hypre_BoomerAMGRelax10TopoOrderedGaussSeidel( hypre_ParCSRMatrix *A,
Expand Down Expand Up @@ -1357,6 +1386,7 @@ hypre_BoomerAMGRelax19GaussElim( hypre_ParCSRMatrix *A,
return relax_error;
}


HYPRE_Int
hypre_BoomerAMGRelax98GaussElimPivot( hypre_ParCSRMatrix *A,
hypre_ParVector *f,
Expand Down
Loading

0 comments on commit 36ab29b

Please sign in to comment.