From 5cf14ddbe0f5a2983abb0396657c2f6b19ab242e Mon Sep 17 00:00:00 2001 From: malphil Date: Mon, 6 May 2024 08:51:47 -0600 Subject: [PATCH 01/13] Add ability to use preconditioned sub-solves in Teko NS SIMPLE --- .../NS/Teko_SIMPLEPreconditionerFactory.cpp | 124 +++++++++++++++--- .../NS/Teko_SIMPLEPreconditionerFactory.hpp | 2 + 2 files changed, 105 insertions(+), 21 deletions(-) diff --git a/packages/teko/src/NS/Teko_SIMPLEPreconditionerFactory.cpp b/packages/teko/src/NS/Teko_SIMPLEPreconditionerFactory.cpp index f0c202d900ec..6cc9ec14fbdf 100644 --- a/packages/teko/src/NS/Teko_SIMPLEPreconditionerFactory.cpp +++ b/packages/teko/src/NS/Teko_SIMPLEPreconditionerFactory.cpp @@ -184,19 +184,59 @@ LinearOp SIMPLEPreconditionerFactory ::buildPreconditionerOperator( hatS = add(C, scale(-1.0, multiply(B, HBt))); } + Teko::ModifiableLinearOp& precInvF = state.getModifiableOp("precInvF"); + if(precVelFactory_){ + if(precInvF == Teuchos::null){ + precInvF = precVelFactory_->buildInverse(F); + state.addModifiableOp("precInvF", precInvF); + } else { + Teko::rebuildInverse(*precVelFactory_, F, precInvF); + } + } + // build the inverse for F - ModifiableLinearOp& invF = state.getModifiableOp("invF"); - if (invF == Teuchos::null) - invF = buildInverse(*invVelFactory_, F); - else - rebuildInverse(*invVelFactory_, F, invF); + Teko::ModifiableLinearOp& invF = state.getModifiableOp("invF"); + if (invF == Teuchos::null){ + if(precInvF.is_null()){ + invF = Teko::buildInverse(*invVelFactory_, F); + } else { + invF = Teko::buildInverse(*invVelFactory_, F, precInvF); + } + } + else{ + if(precInvF.is_null()){ + Teko::rebuildInverse(*invVelFactory_, F, invF); + } else { + Teko::rebuildInverse(*invVelFactory_, F, precInvF, invF); + } + } + + Teko::ModifiableLinearOp& precInvS = state.getModifiableOp("precInvS"); + if(precPrsFactory_){ + if(precInvS == Teuchos::null){ + precInvS = precPrsFactory_->buildInverse(hatS); + state.addModifiableOp("precInvS", precInvS); + } else { + Teko::rebuildInverse(*precPrsFactory_, hatS, precInvS); + } + } // build the approximate Schur complement - ModifiableLinearOp& invS = state.getModifiableOp("invS"); - if (invS == Teuchos::null) - invS = buildInverse(*invPrsFactory_, hatS); - else - rebuildInverse(*invPrsFactory_, hatS, invS); + Teko::ModifiableLinearOp& invS = state.getModifiableOp("invS"); + if (invS == Teuchos::null){ + if(precInvS == Teuchos::null){ + invS = Teko::buildInverse(*invPrsFactory_, hatS); + } else { + invS = Teko::buildInverse(*invPrsFactory_, hatS, precInvS); + } + } + else { + if(precInvS == Teuchos::null){ + Teko::rebuildInverse(*invPrsFactory_, hatS, invS); + } else { + Teko::rebuildInverse(*invPrsFactory_, hatS, precInvS, invS); + } + } std::vector invDiag(2); // vector storing inverses @@ -232,15 +272,19 @@ void SIMPLEPreconditionerFactory::initializeFromParameterList(const Teuchos::Par fInverseType_ = Diagonal; // get string specifying inverse - std::string invStr = "", invVStr = "", invPStr = ""; + std::string invStr = "", invVStr = "", invPStr = "", precVStr = "", precPStr = ""; alpha_ = 1.0; // "parse" the parameter list if (pl.isParameter("Inverse Type")) invStr = pl.get("Inverse Type"); if (pl.isParameter("Inverse Velocity Type")) invVStr = pl.get("Inverse Velocity Type"); + if (pl.isParameter("Preconditioner Velocity Type")) + precVStr = pl.get("Preconditioner Velocity Type"); if (pl.isParameter("Inverse Pressure Type")) invPStr = pl.get("Inverse Pressure Type"); + if (pl.isParameter("Preconditioner Pressure Type")) + precPStr = pl.get("Preconditioner Pressure Type"); if (pl.isParameter("Alpha")) alpha_ = pl.get("Alpha"); if (pl.isParameter("Explicit Velocity Inverse Type")) { std::string fInverseStr = pl.get("Explicit Velocity Inverse Type"); @@ -257,7 +301,9 @@ void SIMPLEPreconditionerFactory::initializeFromParameterList(const Teuchos::Par Teko_DEBUG_MSG_BEGIN(5) DEBUG_STREAM << "SIMPLE Parameters: " << std::endl; DEBUG_STREAM << " inv type = \"" << invStr << "\"" << std::endl; DEBUG_STREAM << " inv v type = \"" << invVStr << "\"" << std::endl; + DEBUG_STREAM << " prec v type = \"" << precVStr << "\"" << std::endl; DEBUG_STREAM << " inv p type = \"" << invPStr << "\"" << std::endl; + DEBUG_STREAM << " prec p type = \"" << precPStr << "\"" << std::endl; DEBUG_STREAM << " alpha = " << alpha_ << std::endl; DEBUG_STREAM << " use mass = " << useMass_ << std::endl; DEBUG_STREAM << " vel scaling = " << getDiagonalName(fInverseType_) << std::endl; @@ -284,10 +330,20 @@ void SIMPLEPreconditionerFactory::initializeFromParameterList(const Teuchos::Par if (invVStr != invPStr) // if different, build pressure inverse factory invPFact = invLib->getInverseFactory(invPStr); + RCP precVFact, precPFact; + if(precVStr != "") + precVFact = invLib->getInverseFactory(precVStr); + + if(precPStr != "") + precPFact = invLib->getInverseFactory(precPStr); + // based on parameter type build a strategy invVelFactory_ = invVFact; invPrsFactory_ = invPFact; + precVelFactory_ = precVFact; + precPrsFactory_ = precPFact; + if (useMass_) { Teuchos::RCP rh = getRequestHandler(); rh->preRequest(Teko::RequestMesg("Velocity Mass Matrix")); @@ -302,19 +358,41 @@ Teuchos::RCP SIMPLEPreconditionerFactory::getRequestedPa Teuchos::RCP pl = rcp(new Teuchos::ParameterList()); // grab parameters from F solver - RCP vList = invVelFactory_->getRequestedParameters(); - if (vList != Teuchos::null) { - Teuchos::ParameterList::ConstIterator itr; - for (itr = vList->begin(); itr != vList->end(); ++itr) pl->setEntry(itr->first, itr->second); - result = pl; + { + RCP vList = invVelFactory_->getRequestedParameters(); + if (vList != Teuchos::null) { + Teuchos::ParameterList::ConstIterator itr; + for (itr = vList->begin(); itr != vList->end(); ++itr) pl->setEntry(itr->first, itr->second); + result = pl; + } + } + + if (precVelFactory_ != Teuchos::null) { + RCP vList = precVelFactory_->getRequestedParameters(); + if (vList != Teuchos::null) { + Teuchos::ParameterList::ConstIterator itr; + for (itr = vList->begin(); itr != vList->end(); ++itr) pl->setEntry(itr->first, itr->second); + result = pl; + } } // grab parameters from S solver - RCP pList = invPrsFactory_->getRequestedParameters(); - if (pList != Teuchos::null) { - Teuchos::ParameterList::ConstIterator itr; - for (itr = pList->begin(); itr != pList->end(); ++itr) pl->setEntry(itr->first, itr->second); - result = pl; + { + RCP pList = invPrsFactory_->getRequestedParameters(); + if (pList != Teuchos::null) { + Teuchos::ParameterList::ConstIterator itr; + for (itr = pList->begin(); itr != pList->end(); ++itr) pl->setEntry(itr->first, itr->second); + result = pl; + } + } + + if (precPrsFactory_ != Teuchos::null) { + RCP pList = precPrsFactory_->getRequestedParameters(); + if (pList != Teuchos::null) { + Teuchos::ParameterList::ConstIterator itr; + for (itr = pList->begin(); itr != pList->end(); ++itr) pl->setEntry(itr->first, itr->second); + result = pl; + } } // grab parameters from S solver @@ -338,6 +416,10 @@ bool SIMPLEPreconditionerFactory::updateRequestedParameters(const Teuchos::Param // update requested parameters in solvers result &= invVelFactory_->updateRequestedParameters(pl); result &= invPrsFactory_->updateRequestedParameters(pl); + if(precVelFactory_) + result &= precVelFactory_->updateRequestedParameters(pl); + if(precPrsFactory_) + result &= precPrsFactory_->updateRequestedParameters(pl); if (customHFactory_ != Teuchos::null) result &= customHFactory_->updateRequestedParameters(pl); return result; diff --git a/packages/teko/src/NS/Teko_SIMPLEPreconditionerFactory.hpp b/packages/teko/src/NS/Teko_SIMPLEPreconditionerFactory.hpp index ce0f4c87cf5d..62f0f4fd6a5d 100644 --- a/packages/teko/src/NS/Teko_SIMPLEPreconditionerFactory.hpp +++ b/packages/teko/src/NS/Teko_SIMPLEPreconditionerFactory.hpp @@ -103,7 +103,9 @@ class SIMPLEPreconditionerFactory : public BlockPreconditionerFactory { // class members Teuchos::RCP customHFactory_; Teuchos::RCP invVelFactory_; + Teuchos::RCP precVelFactory_; Teuchos::RCP invPrsFactory_; + Teuchos::RCP precPrsFactory_; double alpha_; DiagonalType fInverseType_; // enum FInverseType {Diagonal,Lumped,AbsRowSum,Custom} fInverseType_; From 054d8fea52346f506efe0ccb1388309288c41eb9 Mon Sep 17 00:00:00 2001 From: malphil Date: Mon, 6 May 2024 10:37:24 -0600 Subject: [PATCH 02/13] Add test coverage for setting up sub-solves in NS SIMPLE --- .../tSIMPLEPreconditionerFactory_tpetra.cpp | 36 +++++++++++++++++++ .../tSIMPLEPreconditionerFactory_tpetra.hpp | 1 + 2 files changed, 37 insertions(+) diff --git a/packages/teko/tests/src/tSIMPLEPreconditionerFactory_tpetra.cpp b/packages/teko/tests/src/tSIMPLEPreconditionerFactory_tpetra.cpp index 922d3f0e6e4a..5a78cc879383 100644 --- a/packages/teko/tests/src/tSIMPLEPreconditionerFactory_tpetra.cpp +++ b/packages/teko/tests/src/tSIMPLEPreconditionerFactory_tpetra.cpp @@ -272,6 +272,13 @@ int tSIMPLEPreconditionerFactory_tpetra::runTest(int verbosity, std::ostream &st failcount += status ? 0 : 1; totalrun++; + status = test_iterativeSolves(verbosity, failstrm); + Teko_TEST_MSG_tpetra(stdstrm, 1, " \"iterativeSolves\" ... PASSED", + " \"iterativeSolves\" ... FAILED"); + allTests &= status; + failcount += status ? 0 : 1; + totalrun++; + status = test_diagonal(verbosity, failstrm, 0); Teko_TEST_MSG_tpetra(stdstrm, 1, " \"diagonal(diag)\" ... PASSED", " \"diagonal(diag)\" ... FAILED"); @@ -716,5 +723,34 @@ bool tSIMPLEPreconditionerFactory_tpetra::test_result(int verbosity, std::ostrea return true; } +bool tSIMPLEPreconditionerFactory_tpetra::test_iterativeSolves(int verbosity, std::ostream& os) +{ + bool status = false; + bool allPassed = true; + + RCP params = Teuchos::rcp(new ParameterList()); + ParameterList& tekoList = params->sublist("Preconditioner Types").sublist("Teko"); + tekoList.set("Inverse Type", "SIMPLE"); + ParameterList& ifl = tekoList.sublist("Inverse Factory Library"); + ifl.sublist("SIMPLE").set("Type", "NS SIMPLE"); + ifl.sublist("SIMPLE").set("Inverse Velocity Type", "Belos"); + ifl.sublist("SIMPLE").set("Preconditioner Velocity Type", "Ifpack2"); + ifl.sublist("SIMPLE").set("Inverse Pressure Type", "Belos"); + ifl.sublist("SIMPLE").set("Preconditioner Pressure Type", "Ifpack2"); + + RCP invLib = Teko::InverseLibrary::buildFromParameterList(ifl); + RCP invFact = invLib->getInverseFactory("SIMPLE"); + + Teko::ModifiableLinearOp inv = Teko::buildInverse(*invFact, A_); + TEST_ASSERT(!inv.is_null(), "Constructed preconditioner is null"); + + if (!inv.is_null()) { + Teko::rebuildInverse(*invFact, A_, inv); + TEST_ASSERT(!inv.is_null(), "Constructed preconditioner is null"); + } + + return true; +} + } // end namespace Test } // end namespace Teko diff --git a/packages/teko/tests/src/tSIMPLEPreconditionerFactory_tpetra.hpp b/packages/teko/tests/src/tSIMPLEPreconditionerFactory_tpetra.hpp index fc3efe33cd69..c31a423bf3ec 100644 --- a/packages/teko/tests/src/tSIMPLEPreconditionerFactory_tpetra.hpp +++ b/packages/teko/tests/src/tSIMPLEPreconditionerFactory_tpetra.hpp @@ -72,6 +72,7 @@ class tSIMPLEPreconditionerFactory_tpetra : public UnitTest { bool test_initializePrec(int verbosity, std::ostream& os, int use_blocking); bool test_uninitializePrec(int verbosity, std::ostream& os); bool test_isCompatable(int verbosity, std::ostream& os); + bool test_iterativeSolves(int verbosity, std::ostream& os); // non-member tests bool test_result(int verbosity, std::ostream& os, int use_blocking); From 04ccefd84f729b241d0efad5c6e9095997bd8d18 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 6 May 2024 13:40:14 -0600 Subject: [PATCH 03/13] Tpetra: use non-fused residual if TPLs available set behavior variable TPETRA_FUSED_RESIDUAL to OFF by default if a sparse TPL (cusparse, rocsparse, or MKL) is enabled. This improves performance of Tpetra::Details::residual compared to the Tpetra implementation of the fused residual. --- packages/tpetra/core/src/Tpetra_Details_Behavior.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packages/tpetra/core/src/Tpetra_Details_Behavior.cpp b/packages/tpetra/core/src/Tpetra_Details_Behavior.cpp index b239d0569b9e..a67925d8f1cd 100644 --- a/packages/tpetra/core/src/Tpetra_Details_Behavior.cpp +++ b/packages/tpetra/core/src/Tpetra_Details_Behavior.cpp @@ -54,6 +54,7 @@ #include "Teuchos_TestForException.hpp" #include "TpetraCore_config.h" #include "Tpetra_Details_Behavior.hpp" +#include "KokkosKernels_config.h" // for TPL enable macros /*! \file Tpetra_Details_Behavior.cpp @@ -681,7 +682,13 @@ bool Behavior::skipCopyAndPermuteIfPossible() { } bool Behavior::fusedResidual() { +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \ + defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \ + defined(KOKKOSKERNELS_ENABLE_TPL_MKL) + constexpr bool defaultValue(false); +#else constexpr bool defaultValue(true); +#endif static bool value_ = defaultValue; static bool initialized_ = false; From 242eed746241f65d1aa93fbd74554be2243afbe5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 May 2024 22:58:02 +0000 Subject: [PATCH 04/13] Bump actions/dependency-review-action from 4.3.1 to 4.3.2 Bumps [actions/dependency-review-action](https://github.com/actions/dependency-review-action) from 4.3.1 to 4.3.2. - [Release notes](https://github.com/actions/dependency-review-action/releases) - [Commits](https://github.com/actions/dependency-review-action/compare/e58c696e52cac8e62d61cc21fda89565d71505d7...0c155c5e8556a497adf53f2c18edabf945ed8e70) --- updated-dependencies: - dependency-name: actions/dependency-review-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/dependency-review.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index ac8eff6e6b63..35288cf3603d 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -24,4 +24,4 @@ jobs: - name: 'Checkout Repository' uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: 'Dependency Review' - uses: actions/dependency-review-action@e58c696e52cac8e62d61cc21fda89565d71505d7 # v4.3.1 + uses: actions/dependency-review-action@0c155c5e8556a497adf53f2c18edabf945ed8e70 # v4.3.2 From 56e598247db4ebb28dde0820dc9af95af10e7b4c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 7 May 2024 00:36:52 +0000 Subject: [PATCH 05/13] Bump actions/checkout from 4.1.4 to 4.1.5 Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.4 to 4.1.5. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/0ad4b8fadaa221de15dcec353f45205ec38ea70b...44c2b7a8a4ea60a981eaca3cf939b5f4305c123b) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/AT2.yml | 4 ++-- .github/workflows/clang_format.yml | 2 +- .github/workflows/codeql.yml | 2 +- .github/workflows/dependency-review.yml | 2 +- .github/workflows/detect-git-lfs.yml | 2 +- .github/workflows/detect-mpi-comm-world.yml | 2 +- .github/workflows/scorecards.yml | 2 +- .github/workflows/spack.yml | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index df2699da2911..0ff67a7f9582 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -41,7 +41,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 with: fetch-depth: 0 - name: Repo status @@ -99,7 +99,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 with: fetch-depth: 0 - name: Repo status diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index 2b0db47934e2..dc9acf5c1af6 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 - uses: DoozyX/clang-format-lint-action@11b773b1598aa4ae3b32f023701bca5201c3817d # v0.17 with: source: './packages/muelu ./packages/tempus ./packages/teko ./packages/xpetra' diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 36b81d5546b8..1cc943b94e91 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -58,7 +58,7 @@ jobs: # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages steps: - name: Checkout repository - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 35288cf3603d..eea3e827b911 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -22,6 +22,6 @@ jobs: egress-policy: audit - name: 'Checkout Repository' - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 - name: 'Dependency Review' uses: actions/dependency-review-action@0c155c5e8556a497adf53f2c18edabf945ed8e70 # v4.3.2 diff --git a/.github/workflows/detect-git-lfs.yml b/.github/workflows/detect-git-lfs.yml index 06cd90e99b11..6feb55942e72 100644 --- a/.github/workflows/detect-git-lfs.yml +++ b/.github/workflows/detect-git-lfs.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 with: fetch-depth: 0 diff --git a/.github/workflows/detect-mpi-comm-world.yml b/.github/workflows/detect-mpi-comm-world.yml index 10aa15be20e2..d5c8413e022c 100644 --- a/.github/workflows/detect-mpi-comm-world.yml +++ b/.github/workflows/detect-mpi-comm-world.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 with: fetch-depth: 0 diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 08b048658f7f..c467a2b7d999 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -31,7 +31,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 with: persist-credentials: false diff --git a/.github/workflows/spack.yml b/.github/workflows/spack.yml index 9791ed876496..d9a991c6968c 100644 --- a/.github/workflows/spack.yml +++ b/.github/workflows/spack.yml @@ -24,7 +24,7 @@ jobs: with: access_token: ${{ github.token }} - name: Clone Trilinos - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5 with: fetch-depth: 1 - name: Spack build From a353ecd7efcb8ff52ab1b246d06449f545b830b7 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Tue, 7 May 2024 11:17:52 -0600 Subject: [PATCH 06/13] Snapshot of kokkos.git from commit c80cdafef30d02b00577c1dbbae2e925314f45a5 From repository at git@github.com:kokkos/kokkos.git At commit: commit c80cdafef30d02b00577c1dbbae2e925314f45a5 Author: Nathan Ellingwood Date: Tue May 7 11:15:43 2024 -0600 update master_history.txt Signed-off-by: Nathan Ellingwood --- packages/kokkos/CHANGELOG.md | 17 ++++++- packages/kokkos/CMakeLists.txt | 2 +- packages/kokkos/Copyright.txt | 49 +++---------------- packages/kokkos/LICENSE | 10 ---- packages/kokkos/Makefile.kokkos | 34 ++++++------- .../src/sorting/impl/Kokkos_SortImpl.hpp | 5 ++ packages/kokkos/bin/nvcc_wrapper | 2 +- packages/kokkos/cmake/KokkosCore_config.h.in | 1 + .../kokkos/cmake/kokkos_enable_devices.cmake | 37 +++++++------- .../kokkos/cmake/kokkos_enable_options.cmake | 1 + .../src/Cuda/Kokkos_Cuda_Parallel_Range.hpp | 5 +- packages/kokkos/core/src/HIP/Kokkos_HIP.cpp | 4 ++ .../HIP/Kokkos_HIP_SharedAllocationRecord.cpp | 4 ++ .../HIP/Kokkos_HIP_SharedAllocationRecord.hpp | 4 ++ .../kokkos/core/src/HIP/Kokkos_HIP_Space.hpp | 19 ++++++- .../Serial/Kokkos_Serial_Parallel_Team.hpp | 12 ++--- .../unit_test/TestExecSpacePartitioning.hpp | 14 ++++++ packages/kokkos/core/unit_test/TestReduce.hpp | 26 ++++++++++ .../core/unit_test/hip/TestHIP_Spaces.cpp | 16 ++++++ packages/kokkos/master_history.txt | 1 + 20 files changed, 165 insertions(+), 98 deletions(-) diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md index 244165f05709..4fbc9002973e 100644 --- a/packages/kokkos/CHANGELOG.md +++ b/packages/kokkos/CHANGELOG.md @@ -1,5 +1,18 @@ # CHANGELOG +## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01) + +### Backend and Architecture Enhancements: + +#### HIP: +* MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877) + +### Bug Fixes +* Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951) +* `nvcc_wrapper`: bring back support for `--fmad` option [\#6931](https://github.com/kokkos/kokkos/pull/6931) +* Fix CUDA reduction overflow for `RangePolicy` [\#6578](https://github.com/kokkos/kokkos/pull/6578) + ## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00) @@ -39,7 +52,7 @@ * Make the OpenACC backend asynchronous [\#6772](https://github.com/kokkos/kokkos/pull/6772) #### Threads: -* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6446) +* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6601) #### OpenMP: * Improve performance of view initializations and filling with zeros [\#6573](https://github.com/kokkos/kokkos/pull/6573) @@ -98,7 +111,7 @@ * HIP,Cuda,OpenMPTarget: Fixup use provided execution space when copying host inaccessible reduction result [\#6777](https://github.com/kokkos/kokkos/pull/6777) * Fix typo in `cuda_func_set_attribute[s]_wrapper` preventing proper setting of desired occupancy [\#6786](https://github.com/kokkos/kokkos/pull/6786) * Avoid undefined behavior due to conversion between signed and unsigned integers in shift_{right, left}_team_impl [\#6821](https://github.com/kokkos/kokkos/pull/6821) -* Fix a bug in Makefile.kokkos when using AMD GPU architectures as `AMD_GFXYYY` [\#6892](https://github.com/kokkos/kokkos/pull/6892) +* Fix a bug in Makefile.kokkos when using AMD GPU architectures as `AMD_GFXYYY` [\#6892](https://github.com/kokkos/kokkos/pull/6892) ## [4.2.01](https://github.com/kokkos/kokkos/tree/4.2.01) (2023-12-07) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.00...4.2.01) diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt index 93a796f200b5..76f2183db8ac 100644 --- a/packages/kokkos/CMakeLists.txt +++ b/packages/kokkos/CMakeLists.txt @@ -151,7 +151,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MINOR 3) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/packages/kokkos/Copyright.txt b/packages/kokkos/Copyright.txt index 5e2f8d8647b5..cbba3efc7bc5 100644 --- a/packages/kokkos/Copyright.txt +++ b/packages/kokkos/Copyright.txt @@ -1,41 +1,8 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER +************************************************************************ + + Kokkos v. 4.0 + Copyright (2022) National Technology & Engineering + Solutions of Sandia, LLC (NTESS). + +Under the terms of Contract DE-NA0003525 with NTESS, +the U.S. Government retains certain rights in this software. diff --git a/packages/kokkos/LICENSE b/packages/kokkos/LICENSE index 6572cc2db055..4d9d69d7c445 100644 --- a/packages/kokkos/LICENSE +++ b/packages/kokkos/LICENSE @@ -1,13 +1,3 @@ - ************************************************************************ - - Kokkos v. 4.0 - Copyright (2022) National Technology & Engineering - Solutions of Sandia, LLC (NTESS). - - Under the terms of Contract DE-NA0003525 with NTESS, - the U.S. Government retains certain rights in this software. - - ============================================================================== Kokkos is under the Apache License v2.0 with LLVM Exceptions: ============================================================================== diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos index 2c74dd77bfb7..d9be7901a382 100644 --- a/packages/kokkos/Makefile.kokkos +++ b/packages/kokkos/Makefile.kokkos @@ -2,7 +2,7 @@ KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MINOR = 3 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -1222,6 +1222,22 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp") + ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") @@ -1241,26 +1257,10 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") endif - ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif - ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) diff --git a/packages/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/packages/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp index 4c174b5fda94..08946228919b 100644 --- a/packages/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp +++ b/packages/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -399,9 +399,14 @@ sort_device_view_with_comparator( using ViewType = Kokkos::View; using MemSpace = typename ViewType::memory_space; +// Note with HIP unified memory this code path is still the right thing to do +// if we end up here when RocThrust is not enabled. +// The create_mirror_view_and_copy will do the right thing (no copy). +#ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY static_assert(!SpaceAccessibility::accessible, "Impl::sort_device_view_with_comparator: should not be called " "on a view that is already accessible on the host"); +#endif copy_to_host_run_stdsort_copy_back(exec, view, comparator); } diff --git a/packages/kokkos/bin/nvcc_wrapper b/packages/kokkos/bin/nvcc_wrapper index 9b935835d5ff..dbfef2267fec 100755 --- a/packages/kokkos/bin/nvcc_wrapper +++ b/packages/kokkos/bin/nvcc_wrapper @@ -229,7 +229,7 @@ do fi ;; #Handle known nvcc args - --dryrun|-dryrun|--verbose|--keep|-keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-keep-dir*|-G|-lineinfo|--generate-line-info|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-res-usage|-fmad=*|--use_fast_math|-use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) + --dryrun|-dryrun|--verbose|--keep|-keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-keep-dir*|-G|-lineinfo|--generate-line-info|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-res-usage|--fmad=*|-fmad=*|--use_fast_math|-use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) cuda_args="$cuda_args $1" ;; #Handle more known nvcc args diff --git a/packages/kokkos/cmake/KokkosCore_config.h.in b/packages/kokkos/cmake/KokkosCore_config.h.in index 2df0f6c52054..3ab39cd6abf0 100644 --- a/packages/kokkos/cmake/KokkosCore_config.h.in +++ b/packages/kokkos/cmake/KokkosCore_config.h.in @@ -39,6 +39,7 @@ #cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC #cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE #cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS +#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY #cmakedefine KOKKOS_ENABLE_IMPL_HPX_ASYNC_DISPATCH #cmakedefine KOKKOS_ENABLE_DEBUG #cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK diff --git a/packages/kokkos/cmake/kokkos_enable_devices.cmake b/packages/kokkos/cmake/kokkos_enable_devices.cmake index 9a977520a3a0..c7d189285c58 100644 --- a/packages/kokkos/cmake/kokkos_enable_devices.cmake +++ b/packages/kokkos/cmake/kokkos_enable_devices.cmake @@ -40,6 +40,26 @@ ELSE() ENDIF() KOKKOS_DEVICE_OPTION(OPENMP ${OMP_DEFAULT} HOST "Whether to build OpenMP backend") + +# We want this to default to OFF for cache reasons, but if no +# host space is given, then activate serial +IF (KOKKOS_HAS_TRILINOS) + #However, Trilinos always wants Serial ON + SET(SERIAL_DEFAULT ON) +ELSEIF (KOKKOS_HAS_HOST) + SET(SERIAL_DEFAULT OFF) +ELSE() + SET(SERIAL_DEFAULT ON) + IF (NOT DEFINED Kokkos_ENABLE_SERIAL) + MESSAGE(STATUS "SERIAL backend is being turned on to ensure there is at least one Host space. To change this, you must enable another host execution space and configure with -DKokkos_ENABLE_SERIAL=OFF or change CMakeCache.txt") + ENDIF() +ENDIF() +KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend") + +KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)") + +# Device backends have to come after host backends for header include order reasons +# Without this we can't make e.g. CudaSpace accessible by HostSpace KOKKOS_DEVICE_OPTION(OPENACC OFF DEVICE "Whether to build the OpenACC backend") IF (KOKKOS_ENABLE_OPENACC) COMPILER_SPECIFIC_FLAGS( @@ -90,23 +110,6 @@ IF (KOKKOS_ENABLE_CUDA) LIST(APPEND DEVICE_SETUP_LIST Cuda) ENDIF() -# We want this to default to OFF for cache reasons, but if no -# host space is given, then activate serial -IF (KOKKOS_HAS_TRILINOS) - #However, Trilinos always wants Serial ON - SET(SERIAL_DEFAULT ON) -ELSEIF (KOKKOS_HAS_HOST) - SET(SERIAL_DEFAULT OFF) -ELSE() - SET(SERIAL_DEFAULT ON) - IF (NOT DEFINED Kokkos_ENABLE_SERIAL) - MESSAGE(STATUS "SERIAL backend is being turned on to ensure there is at least one Host space. To change this, you must enable another host execution space and configure with -DKokkos_ENABLE_SERIAL=OFF or change CMakeCache.txt") - ENDIF() -ENDIF() -KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend") - -KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)") - KOKKOS_DEVICE_OPTION(HIP OFF DEVICE "Whether to build HIP backend") ## HIP has extra setup requirements, turn on Kokkos_Setup_HIP.hpp in macros diff --git a/packages/kokkos/cmake/kokkos_enable_options.cmake b/packages/kokkos/cmake/kokkos_enable_options.cmake index a437f6132aa9..32788e7aa0f6 100644 --- a/packages/kokkos/cmake/kokkos_enable_options.cmake +++ b/packages/kokkos/cmake/kokkos_enable_options.cmake @@ -70,6 +70,7 @@ KOKKOS_ENABLE_OPTION(TUNING OFF "Whether to create bindings for tu KOKKOS_ENABLE_OPTION(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops") KOKKOS_ENABLE_OPTION(COMPILE_AS_CMAKE_LANGUAGE OFF "Whether to use native cmake language support") KOKKOS_ENABLE_OPTION(HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF "Whether multiple kernels are instantiated at compile time - improve performance but increase compile time") +KOKKOS_ENABLE_OPTION(IMPL_HIP_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for HIP") # This option will go away eventually, but allows fallback to old implementation when needed. KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation") diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp index 0f052be3c307..334834938a17 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp @@ -312,8 +312,9 @@ class ParallelReduce, // REQUIRED ( 1 , N , 1 ) dim3 block(1, block_size, 1); // Required grid.x <= block.y - dim3 grid(std::min(int(block.y), int((nwork + block.y - 1) / block.y)), 1, - 1); + dim3 grid(std::min(index_type(block.y), + index_type((nwork + block.y - 1) / block.y)), + 1, 1); // TODO @graph We need to effectively insert this in to the graph const int shmem = diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP.cpp index 309e07fb3fbb..aced2083ffb5 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP.cpp @@ -146,6 +146,10 @@ void HIP::print_configuration(std::ostream& os, bool /*verbose*/) const { #else os << "no\n"; #endif +#ifdef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY + os << " KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY: "; + os << "yes\n"; +#endif os << "\nRuntime Configuration:\n"; diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp index ab24004f5fcb..83f829fddae3 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp @@ -23,8 +23,12 @@ #include #include +#ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( Kokkos::HIPSpace); +#else +KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(Kokkos::HIPSpace); +#endif KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( Kokkos::HIPHostPinnedSpace); KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp index fbae51883448..1ca7bd5cd0e6 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp @@ -20,8 +20,12 @@ #include #include +#if defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPSpace); +#else KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_SPECIALIZATION( Kokkos::HIPSpace); +#endif KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPHostPinnedSpace); KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPManagedSpace); diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp index 7f2004e5cbc6..e1b4768b8771 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp @@ -65,6 +65,18 @@ class HIPSpace { ~HIPSpace() = default; /**\brief Allocate untracked memory in the hip space */ +#ifdef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY + template + void* allocate(const ExecutionSpace&, const size_t arg_alloc_size) const { + return allocate(arg_alloc_size); + } + template + void* allocate(const ExecutionSpace&, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const { + return allocate(arg_label, arg_alloc_size, arg_logical_size); + } +#else // FIXME_HIP Use execution space instance void* allocate(const HIP&, const size_t arg_alloc_size) const { return allocate(arg_alloc_size); @@ -74,6 +86,7 @@ class HIPSpace { const size_t arg_logical_size = 0) const { return allocate(arg_label, arg_alloc_size, arg_logical_size); } +#endif void* allocate(const size_t arg_alloc_size) const; void* allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; @@ -267,7 +280,11 @@ static_assert(Kokkos::Impl::MemorySpaceAccess::assignable); template <> struct MemorySpaceAccess { enum : bool { assignable = false }; - enum : bool { accessible = false }; +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) + enum : bool{accessible = false}; +#else + enum : bool { accessible = true }; +#endif enum : bool { deepcopy = true }; }; diff --git a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp index f34a7daaca00..a25b51496eff 100644 --- a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp @@ -37,6 +37,8 @@ class TeamPolicyInternal int m_league_size; int m_chunk_size; + Kokkos::Serial m_space; + public: //! Tag this class as a kokkos execution policy using execution_policy = TeamPolicyInternal; @@ -46,10 +48,7 @@ class TeamPolicyInternal //! Execution space of this execution policy: using execution_space = Kokkos::Serial; - const typename traits::execution_space& space() const { - static typename traits::execution_space m_space; - return m_space; - } + const typename traits::execution_space& space() const { return m_space; } template friend class TeamPolicyInternal; @@ -116,12 +115,13 @@ class TeamPolicyInternal return (level == 0 ? 1024 * 32 : 20 * 1024 * 1024); } /** \brief Specify league size, request team size */ - TeamPolicyInternal(const execution_space&, int league_size_request, + TeamPolicyInternal(const execution_space& space, int league_size_request, int team_size_request, int /* vector_length_request */ = 1) : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, m_league_size(league_size_request), - m_chunk_size(32) { + m_chunk_size(32), + m_space(space) { if (team_size_request > 1) Kokkos::abort("Kokkos::abort: Requested Team Size is too large!"); } diff --git a/packages/kokkos/core/unit_test/TestExecSpacePartitioning.hpp b/packages/kokkos/core/unit_test/TestExecSpacePartitioning.hpp index 65314d6be7cf..f8b570ab64d1 100644 --- a/packages/kokkos/core/unit_test/TestExecSpacePartitioning.hpp +++ b/packages/kokkos/core/unit_test/TestExecSpacePartitioning.hpp @@ -28,6 +28,17 @@ struct SumFunctor { void operator()(int i, int& lsum) const { lsum += i; } }; +template +void check_space_member_for_policies(const ExecSpace& exec) { + Kokkos::RangePolicy range_policy(exec, 0, 1); + ASSERT_EQ(range_policy.space(), exec); + Kokkos::MDRangePolicy> mdrange_policy(exec, {0, 0}, + {1, 1}); + ASSERT_EQ(mdrange_policy.space(), exec); + Kokkos::TeamPolicy team_policy(exec, 1, Kokkos::AUTO); + ASSERT_EQ(team_policy.space(), exec); +} + template void check_distinctive([[maybe_unused]] ExecSpace exec1, [[maybe_unused]] ExecSpace exec2) { @@ -89,6 +100,9 @@ void run_threaded_test(const Lambda1 l1, const Lambda2 l2) { void test_partitioning(std::vector& instances) { check_distinctive(instances[0], instances[1]); + check_space_member_for_policies(instances[0]); + check_space_member_for_policies(instances[1]); + int sum1, sum2; int N = 3910; run_threaded_test( diff --git a/packages/kokkos/core/unit_test/TestReduce.hpp b/packages/kokkos/core/unit_test/TestReduce.hpp index e1aa851f1020..61b2bfb15052 100644 --- a/packages/kokkos/core/unit_test/TestReduce.hpp +++ b/packages/kokkos/core/unit_test/TestReduce.hpp @@ -625,4 +625,30 @@ TEST(TEST_CATEGORY, int_combined_reduce_mixed) { } #endif #endif + +#if defined(NDEBUG) +// the following test was made for: +// https://github.com/kokkos/kokkos/issues/6517 + +struct FunctorReductionWithLargeIterationCount { + KOKKOS_FUNCTION void operator()(const int64_t /*i*/, double& update) const { + update += 1.0; + } +}; + +TEST(TEST_CATEGORY, reduction_with_large_iteration_count) { + if constexpr (std::is_same_v) { + GTEST_SKIP() << "Disabling for host backends"; + } + + const int64_t N = pow(2LL, 39LL) - pow(2LL, 8LL) + 1; + Kokkos::RangePolicy> p(0, N); + double nu = 0; + EXPECT_NO_THROW(Kokkos::parallel_reduce( + "sample reduction", p, FunctorReductionWithLargeIterationCount(), nu)); + ASSERT_DOUBLE_EQ(nu, double(N)); +} +#endif + } // namespace Test diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp index 8f7499c244b0..673c0f0fff35 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp @@ -38,8 +38,13 @@ TEST(hip, space_access) { static_assert(!Kokkos::Impl::MemorySpaceAccess::assignable); +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) static_assert(!Kokkos::Impl::MemorySpaceAccess::accessible); +#else + static_assert(Kokkos::Impl::MemorySpaceAccess::accessible); +#endif static_assert( !Kokkos::Impl::MemorySpaceAccess::accessible); +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) static_assert(!Kokkos::SpaceAccessibility::accessible); +#else + static_assert(Kokkos::SpaceAccessibility::accessible); +#endif static_assert( Kokkos::SpaceAccessibility::accessible); +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) static_assert(std::is_same::Space, Kokkos::HostSpace>::value); +#else + static_assert(std::is_same::Space, + Kokkos::Device>::value); +#endif static_assert( std::is_same::Space, diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt index bd122a456bdc..31be92532543 100644 --- a/packages/kokkos/master_history.txt +++ b/packages/kokkos/master_history.txt @@ -36,3 +36,4 @@ tag: 4.1.00 date: 06:20:2023 master: 62d2b6c8 release: adde1e6a tag: 4.2.00 date: 11:09:2023 master: 1a3ea28f release: abe01c88 tag: 4.2.01 date: 01:30:2024 master: 71a9bcae release: 221e5f7a tag: 4.3.00 date: 04:03:2024 master: e0dc0128 release: f08217a4 +tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e From da44a338eb9c0e47ac8084d7966a9e0dce3a8a1a Mon Sep 17 00:00:00 2001 From: kliegeois Date: Tue, 7 May 2024 09:37:31 -0600 Subject: [PATCH 07/13] Add timers for potentially costly regions of the driver --- packages/ifpack2/example/BlockTriDi.cpp | 35 ++++++++++++++++++------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/packages/ifpack2/example/BlockTriDi.cpp b/packages/ifpack2/example/BlockTriDi.cpp index 0a27ed1bb198..3050666b490d 100644 --- a/packages/ifpack2/example/BlockTriDi.cpp +++ b/packages/ifpack2/example/BlockTriDi.cpp @@ -318,6 +318,8 @@ main (int argc, char* argv[]) RCP