From 423aa60d16eaec412171a0b22ba959bb875887c0 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 15 May 2024 17:10:43 -0600 Subject: [PATCH 1/3] Tpetra: restore imbalanced row path Bring back the tpetra imbalanced row heuristic for spmv. This was removed in #12852 but it is still needed for CUDA < 11.2.1. --- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 06522d5b68e9..febf5fc28ddd 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -5087,9 +5087,24 @@ CrsMatrix:: #if KOKKOSKERNELS_VERSION >= 40299 auto A_lcl = getLocalMatrixDevice(); + if(!applyHelper.get()) { // The apply helper does not exist, so create it. - applyHelper = std::make_shared(A_lcl.nnz(), A_lcl.graph.row_map); + // Decide now whether to use the imbalanced row path, or the default. + bool useMergePath = false; + LocalOrdinal nrows = getLocalNumRows(); + LocalOrdinal maxRowImbalance = 0; + if(nrows != 0) + maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows); + //TODO: when https://github.com/kokkos/kokkos-kernels/issues/2166 is fixed and, + //we can use SPMV_MERGE_PATH for the native spmv as well. + //Take out this ifdef to enable that. +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + if(size_t(maxRowImbalance) >= Tpetra::Details::Behavior::rowImbalanceThreshold()) + useMergePath = true; +#endif + applyHelper = std::make_shared(A_lcl.nnz(), A_lcl.graph.row_map, + useMergePath ? KokkosSparse::SPMV_MERGE_PATH : KokkosSparse::SPMV_DEFAULT); } // Translate mode (Teuchos enum) to KokkosKernels (1-character string) From 906d7be642e81e44d36a27880348ce7af8df27be Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Thu, 16 May 2024 16:53:59 -0600 Subject: [PATCH 2/3] kokkos-kernels: patch in spmv regression fixes --- .../sparse/impl/KokkosSparse_spmv_spec.hpp | 53 +++------------ .../sparse/src/KokkosSparse_spmv.hpp | 48 +++---------- .../sparse/src/KokkosSparse_spmv_handle.hpp | 9 +-- ...kosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp | 47 ++++++------- .../KokkosSparse_spmv_mv_tpl_spec_decl.hpp | 12 ++-- .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 68 +++++++++++-------- 6 files changed, 88 insertions(+), 149 deletions(-) diff --git a/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_spec.hpp b/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_spec.hpp index da02b1af5abe..3405ae7575d2 100644 --- a/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_spec.hpp +++ b/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_spec.hpp @@ -208,49 +208,18 @@ struct SPMV_MV KAT; - // Intercept special case: if x/y have only 1 column and both are - // contiguous, use the more efficient single-vector impl. - // - // We cannot do this if x or y is noncontiguous, because the column subview - // must be LayoutStride which is not ETI'd. - // - // Do not use a TPL even if one is available for the types: - // we don't want the same handle being used in both TPL and non-TPL versions - if (x.extent(1) == size_t(1) && x.span_is_contiguous() && - y.span_is_contiguous()) { - Kokkos::View - x0(x.data(), x.extent(0)); - Kokkos::View - y0(y.data(), y.extent(0)); - if (beta == KAT::zero()) { - spmv_beta(space, handle, mode, alpha, A, x0, beta, y0); - } else if (beta == KAT::one()) { - spmv_beta(space, handle, mode, alpha, A, x0, beta, y0); - } else if (beta == -KAT::one()) { - spmv_beta(space, handle, mode, alpha, A, x0, beta, y0); - } else { - spmv_beta(space, handle, mode, alpha, A, x0, beta, y0); - } + if (alpha == KAT::zero()) { + spmv_alpha_mv( + space, mode, alpha, A, x, beta, y); + } else if (alpha == KAT::one()) { + spmv_alpha_mv( + space, mode, alpha, A, x, beta, y); + } else if (alpha == -KAT::one()) { + spmv_alpha_mv( + space, mode, alpha, A, x, beta, y); } else { - if (alpha == KAT::zero()) { - spmv_alpha_mv( - space, mode, alpha, A, x, beta, y); - } else if (alpha == KAT::one()) { - spmv_alpha_mv( - space, mode, alpha, A, x, beta, y); - } else if (alpha == -KAT::one()) { - spmv_alpha_mv( - space, mode, alpha, A, x, beta, y); - } else { - spmv_alpha_mv( - space, mode, alpha, A, x, beta, y); - } + spmv_alpha_mv( + space, mode, alpha, A, x, beta, y); } } }; diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv.hpp index f11b61f675e6..336bae4f1d30 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv.hpp @@ -40,31 +40,6 @@ struct RANK_ONE {}; struct RANK_TWO {}; } // namespace -namespace Impl { -template -inline constexpr bool spmv_general_tpl_avail() { - constexpr bool isBSR = ::KokkosSparse::Experimental::is_bsr_matrix_v; - if constexpr (!isBSR) { - // CRS - if constexpr (XVector::rank() == 1) - return spmv_tpl_spec_avail::value; - else - return spmv_mv_tpl_spec_avail::value; - } else { - // BSR - if constexpr (XVector::rank() == 1) - return spmv_bsrmatrix_tpl_spec_avail::value; - else - return spmv_mv_bsrmatrix_tpl_spec_avail::value; - } -} -} // namespace Impl - // clang-format off /// \brief Kokkos sparse matrix-vector multiply. /// Computes y := alpha*Op(A)*x + beta*y, where Op(A) is @@ -247,8 +222,8 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], typename YVector::device_type, Kokkos::MemoryTraits>; // Special case: XVector/YVector are rank-2 but x,y both have one column and - // are contiguous. If a TPL is available for rank-1 vectors but not rank-2, - // take rank-1 subviews of x,y and call the rank-1 version. + // are contiguous. In this case take rank-1 subviews of x,y and call the + // rank-1 version. if constexpr (XVector::rank() == 2) { using XVector_SubInternal = Kokkos::View< typename XVector::const_value_type*, @@ -259,19 +234,12 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], typename YVector::non_const_value_type*, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, typename YVector::device_type, Kokkos::MemoryTraits>; - if constexpr (!Impl::spmv_general_tpl_avail< - ExecutionSpace, HandleImpl, AMatrix_Internal, - XVector_Internal, YVector_Internal>() && - Impl::spmv_general_tpl_avail< - ExecutionSpace, HandleImpl, AMatrix_Internal, - XVector_SubInternal, YVector_SubInternal>()) { - if (x.extent(1) == size_t(1) && x.span_is_contiguous() && - y.span_is_contiguous()) { - XVector_SubInternal xsub(x.data(), x.extent(0)); - YVector_SubInternal ysub(y.data(), y.extent(0)); - spmv(space, handle->get_impl(), mode, alpha, A, xsub, beta, ysub); - return; - } + if (x.extent(1) == size_t(1) && x.span_is_contiguous() && + y.span_is_contiguous()) { + XVector_SubInternal xsub(x.data(), x.extent(0)); + YVector_SubInternal ysub(y.data(), y.extent(0)); + spmv(space, handle->get_impl(), mode, alpha, A, xsub, beta, ysub); + return; } } diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp index a2eecfd1ce3a..d930cbdfbe35 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp @@ -235,7 +235,8 @@ struct SPMVHandleImpl { "SPMVHandleImpl: Ordinal must not be a const type"); SPMVHandleImpl(SPMVAlgorithm algo_) : algo(algo_) {} ~SPMVHandleImpl() { - if (tpl) delete tpl; + if (tpl_rank1) delete tpl_rank1; + if (tpl_rank2) delete tpl_rank2; } ImplType* get_impl() { return this; } @@ -243,9 +244,9 @@ struct SPMVHandleImpl { /// Get the SPMVAlgorithm used by this handle SPMVAlgorithm get_algorithm() const { return this->algo; } - bool is_set_up = false; - const SPMVAlgorithm algo = SPMV_DEFAULT; - TPL_SpMV_Data* tpl = nullptr; + const SPMVAlgorithm algo = SPMV_DEFAULT; + TPL_SpMV_Data* tpl_rank1 = nullptr; + TPL_SpMV_Data* tpl_rank2 = nullptr; // Expert tuning parameters for native SpMV // TODO: expose a proper Experimental interface to set these. Currently they // can be assigned directly in the SPMVHandle as they are public members. diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp index e867038842d9..5e6fb1fa09b8 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp @@ -43,8 +43,8 @@ inline void spmv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, Subhandle* subhandle; const MKLScalar* x_mkl = reinterpret_cast(x); MKLScalar* y_mkl = reinterpret_cast(y); - if (handle->is_set_up) { - subhandle = dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast(handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for MKL BSR"); @@ -54,7 +54,7 @@ inline void spmv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, // Use the default execution space instance, as classic MKL does not use // a specific instance. subhandle = new Subhandle(ExecSpace()); - handle->tpl = subhandle; + handle->tpl_rank1 = subhandle; subhandle->descr.type = SPARSE_MATRIX_TYPE_GENERAL; subhandle->descr.mode = SPARSE_FILL_MODE_FULL; subhandle->descr.diag = SPARSE_DIAG_NON_UNIT; @@ -87,7 +87,6 @@ inline void spmv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } - handle->is_set_up = true; } MKLScalar alpha_mkl = KokkosSparse::Impl::KokkosToMKLScalar(alpha); MKLScalar beta_mkl = KokkosSparse::Impl::KokkosToMKLScalar(beta); @@ -124,8 +123,8 @@ inline void spmv_mv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, Subhandle* subhandle; const MKLScalar* x_mkl = reinterpret_cast(x); MKLScalar* y_mkl = reinterpret_cast(y); - if (handle->is_set_up) { - subhandle = dynamic_cast(handle->tpl); + if (handle->tpl_rank2) { + subhandle = dynamic_cast(handle->tpl_rank2); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for MKL BSR"); @@ -135,7 +134,7 @@ inline void spmv_mv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, // Use the default execution space instance, as classic MKL does not use // a specific instance. subhandle = new Subhandle(ExecSpace()); - handle->tpl = subhandle; + handle->tpl_rank2 = subhandle; subhandle->descr.type = SPARSE_MATRIX_TYPE_GENERAL; subhandle->descr.mode = SPARSE_FILL_MODE_FULL; subhandle->descr.diag = SPARSE_DIAG_NON_UNIT; @@ -168,7 +167,6 @@ inline void spmv_mv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } - handle->is_set_up = true; } MKLScalar alpha_mkl = KokkosSparse::Impl::KokkosToMKLScalar(alpha); MKLScalar beta_mkl = KokkosSparse::Impl::KokkosToMKLScalar(beta); @@ -390,23 +388,22 @@ void spmv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, KokkosSparse::Impl::CuSparse9_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast( + handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { /* create and set the subhandle and matrix descriptor */ - subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); - handle->is_set_up = true; } cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; @@ -518,23 +515,22 @@ void spmv_mv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, KokkosSparse::Impl::CuSparse9_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank2) { + subhandle = dynamic_cast( + handle->tpl_rank2); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { /* create and set the subhandle and matrix descriptor */ - subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); + handle->tpl_rank2 = subhandle; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); - handle->is_set_up = true; } cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; @@ -886,16 +882,16 @@ void spmv_bsr_rocsparse(const Kokkos::HIP& exec, Handle* handle, rocsparse_value_type* y_ = reinterpret_cast(y.data()); KokkosSparse::Impl::RocSparse_BSR_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast( + handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for rocsparse BSR"); subhandle->set_exec_space(exec); } else { - subhandle = new KokkosSparse::Impl::RocSparse_BSR_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::RocSparse_BSR_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( rocsparse_create_mat_descr(&subhandle->mat)); // *_ex* functions deprecated in introduced in 6+ @@ -949,7 +945,6 @@ void spmv_bsr_rocsparse(const Kokkos::HIP& exec, Handle* handle, "unsupported value type for rocsparse_*bsrmv"); } #endif - handle->is_set_up = true; } // *_ex* functions deprecated in introduced in 6+ diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp index 2ae6bf44f252..853b93f47e7c 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp @@ -186,16 +186,16 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, } KokkosSparse::Impl::CuSparse10_SpMV_Data *subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank2) { + subhandle = dynamic_cast( + handle->tpl_rank2); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { - subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); + handle->tpl_rank2 = subhandle; /* create matrix */ KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( &subhandle->mat, A.numRows(), A.numCols(), A.nnz(), @@ -209,8 +209,6 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMalloc(&subhandle->buffer, subhandle->bufferSize)); - - handle->is_set_up = true; } KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMM(cusparseHandle, opA, opB, &alpha, diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index e3f88e6e1122..bdde1d831492 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -96,25 +96,38 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec( &vecY, y.extent_int(0), (void*)y.data(), myCudaDataType)); - // use default cusparse algo for best performance + // Prior to CUDA 11.2.1, ALG2 was more performant than default for imbalanced + // matrices. After 11.2.1, the default is performant for imbalanced matrices, + // and ALG2 now means something else. CUDA >= 11.2.1 corresponds to + // CUSPARSE_VERSION >= 11402. +#if CUSPARSE_VERSION >= 11402 + const bool useAlg2 = false; +#else + const bool useAlg2 = handle->get_algorithm() == SPMV_MERGE_PATH; +#endif + + // In CUDA 11.2.0, the algorithm enums were renamed. + // This corresponds to CUSPARSE_VERSION >= 11400. #if CUSPARSE_VERSION >= 11400 - cusparseSpMVAlg_t algo = CUSPARSE_SPMV_ALG_DEFAULT; + cusparseSpMVAlg_t algo = + useAlg2 ? CUSPARSE_SPMV_CSR_ALG2 : CUSPARSE_SPMV_ALG_DEFAULT; #else - cusparseSpMVAlg_t algo = CUSPARSE_MV_ALG_DEFAULT; + cusparseSpMVAlg_t algo = + useAlg2 ? CUSPARSE_CSRMV_ALG2 : CUSPARSE_MV_ALG_DEFAULT; #endif KokkosSparse::Impl::CuSparse10_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast( + handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { - subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; /* create matrix */ KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( @@ -135,7 +148,6 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMalloc(&subhandle->buffer, subhandle->bufferSize)); #endif - handle->is_set_up = true; } /* perform SpMV */ @@ -150,24 +162,23 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], KokkosSparse::Impl::CuSparse9_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast( + handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { /* create and set the subhandle and matrix descriptor */ - subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; cusparseMatDescr_t descrA = 0; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); - handle->is_set_up = true; } /* perform the actual SpMV operation */ @@ -419,16 +430,16 @@ void spmv_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode[], rocsparse_spmv_alg alg = rocsparse_spmv_alg_default; KokkosSparse::Impl::RocSparse_CRS_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast( + handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for rocsparse CRS"); subhandle->set_exec_space(exec); } else { - subhandle = new KokkosSparse::Impl::RocSparse_CRS_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::RocSparse_CRS_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; /* Create the rocsparse csr descr */ // We need to do some casting to void* // Note that row_map is always a const view so const_cast is necessary, @@ -476,7 +487,6 @@ void spmv_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode[], KOKKOS_IMPL_HIP_SAFE_CALL( hipMalloc(&subhandle->buffer, subhandle->bufferSize)); #endif - handle->is_set_up = true; } /* Perform the actual computation */ @@ -592,8 +602,8 @@ inline void spmv_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, Subhandle* subhandle; const MKLScalar* x_mkl = reinterpret_cast(x); MKLScalar* y_mkl = reinterpret_cast(y); - if (handle->is_set_up) { - subhandle = dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast(handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for MKL CRS"); @@ -603,7 +613,7 @@ inline void spmv_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, // Use the default execution space instance, as classic MKL does not use // a specific instance. subhandle = new Subhandle(ExecSpace()); - handle->tpl = subhandle; + handle->tpl_rank1 = subhandle; subhandle->descr.type = SPARSE_MATRIX_TYPE_GENERAL; subhandle->descr.mode = SPARSE_FILL_MODE_FULL; subhandle->descr.diag = SPARSE_DIAG_NON_UNIT; @@ -632,7 +642,6 @@ inline void spmv_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } - handle->is_set_up = true; } MKLScalar alpha_mkl = KokkosToMKLScalar(alpha); MKLScalar beta_mkl = KokkosToMKLScalar(beta); @@ -757,15 +766,15 @@ inline void spmv_onemkl(const execution_space& exec, Handle* handle, mkl_mode = oneapi::mkl::transpose::trans; OneMKL_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast(handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for OneMKL CRS"); subhandle->set_exec_space(exec); } else { - subhandle = new OneMKL_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new OneMKL_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; oneapi::mkl::sparse::init_matrix_handle(&subhandle->mat); // Even for out-of-order SYCL queue, the inputs here do not depend on // kernels being sequenced @@ -780,7 +789,6 @@ inline void spmv_onemkl(const execution_space& exec, Handle* handle, // optimize_gemv has finished oneapi::mkl::sparse::optimize_gemv(exec.sycl_queue(), mkl_mode, subhandle->mat, {ev}); - handle->is_set_up = true; } // Uncommon case: an out-of-order SYCL queue does not promise that previously From 1bddb553d2151f49f70d7f23ce11ceeca55cbe11 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 22 May 2024 07:29:57 -0600 Subject: [PATCH 3/3] Fix unused param warning --- packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_spec.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_spec.hpp b/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_spec.hpp index 3405ae7575d2..67a2f0563962 100644 --- a/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_spec.hpp +++ b/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_spec.hpp @@ -203,7 +203,8 @@ struct SPMV_MV { typedef typename YVector::non_const_value_type coefficient_type; - static void spmv_mv(const ExecutionSpace& space, Handle* handle, + // TODO: pass handle through to implementation and use tuning parameters + static void spmv_mv(const ExecutionSpace& space, Handle* /* handle */, const char mode[], const coefficient_type& alpha, const AMatrix& A, const XVector& x, const coefficient_type& beta, const YVector& y) {