diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index c72e311ad456..d4575ba6457d 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -288,7 +288,7 @@ jobs: --workspace-dir /home/runner/_work/Trilinos \ --source-dir ${GITHUB_WORKSPACE} \ --build-dir /home/Trilinos/build \ - --dashboard-build-name=PR-${{ github.event.pull_request.number }}_${AT2_IMAGE}_release_static_uvm \ + --dashboard-build-name=PR-${{ github.event.pull_request.number }}_${AT2_IMAGE}_release_static \ --ctest-driver /home/runner/_work/Trilinos/Trilinos/cmake/SimpleTesting/cmake/ctest-driver.cmake \ --ctest-drop-site sems-cdash-son.sandia.gov/cdash \ --filename-subprojects ./package_subproject_list.cmake \ @@ -385,7 +385,7 @@ jobs: --ctest-driver /home/runner/_work/Trilinos/Trilinos/cmake/SimpleTesting/cmake/ctest-driver.cmake \ --ctest-drop-site sems-cdash-son.sandia.gov/cdash \ --filename-subprojects ./package_subproject_list.cmake \ - --filename-packageenables ./packageEnables.cmake \ + --skip-create-packageenables \ - name: Summary if: ${{ !cancelled() }} shell: bash -l {0} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 19c03bf2714f..5a5e701def17 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -45,7 +45,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Initialize CodeQL - uses: github/codeql-action/init@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 + uses: github/codeql-action/init@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -108,6 +108,6 @@ jobs: ninja -j 16 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 + uses: github/codeql-action/analyze@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 1cbaf2b3c6e4..88c2a1fcf484 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 + uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 with: sarif_file: results.sarif diff --git a/packages/amesos2/cmake/Amesos2_config.h.in b/packages/amesos2/cmake/Amesos2_config.h.in index fc004c7c8fe4..4a6e76ce8f67 100644 --- a/packages/amesos2/cmake/Amesos2_config.h.in +++ b/packages/amesos2/cmake/Amesos2_config.h.in @@ -110,3 +110,9 @@ /* Define to 1 if SuperLU's *gssvx and *gsisx routines need a GlobalLU_t argument. */ #cmakedefine HAVE_AMESOS2_SUPERLU5_API + +#cmakedefine HAVE_AMESOS2_XPETRA +#cmakedefine HAVE_AMESOS2_ZOLTAN2CORE +#ifdef HAVE_AMESOS2_ZOLTAN2CORE +# define HAVE_AMESOS2_ZOLTAN2 +#endif diff --git a/packages/amesos2/cmake/Dependencies.cmake b/packages/amesos2/cmake/Dependencies.cmake index 879d21293c3f..9c5d753ee8ee 100644 --- a/packages/amesos2/cmake/Dependencies.cmake +++ b/packages/amesos2/cmake/Dependencies.cmake @@ -5,7 +5,7 @@ SET(LIB_REQUIRED_DEP_PACKAGES Teuchos Tpetra TrilinosSS Kokkos) SET(LIB_OPTIONAL_DEP_PACKAGES Epetra EpetraExt ShyLU_NodeBasker ShyLU_NodeTacho) SET(TEST_REQUIRED_DEP_PACKAGES) -SET(TEST_OPTIONAL_DEP_PACKAGES ShyLU_NodeBasker ShyLU_NodeTacho Kokkos TrilinosSS) +SET(TEST_OPTIONAL_DEP_PACKAGES ShyLU_NodeBasker ShyLU_NodeTacho Kokkos TrilinosSS Xpetra Zoltan2Core) # SET(LIB_REQUIRED_DEP_TPLS SuperLU) SET(LIB_REQUIRED_DEP_TPLS ) SET(LIB_OPTIONAL_DEP_TPLS MPI SuperLU SuperLUMT SuperLUDist LAPACK UMFPACK PARDISO_MKL CSS_MKL ParMETIS METIS Cholmod MUMPS STRUMPACK CUSPARSE CUSOLVER) diff --git a/packages/amesos2/example/GappedMtxGIDs-1proc.cpp b/packages/amesos2/example/GappedMtxGIDs-1proc.cpp index 4493ee0558b0..49238e16d120 100644 --- a/packages/amesos2/example/GappedMtxGIDs-1proc.cpp +++ b/packages/amesos2/example/GappedMtxGIDs-1proc.cpp @@ -160,8 +160,10 @@ int main(int argc, char *argv[]) { "does not result in the same Map."); } - if ( myRank == 0 && verbose ) { - *fos << "\nrowMap->describe output:" << endl; + if ( verbose ) { + if ( myRank == 0 ) { + *fos << "\nrowMap->describe output:" << endl; + } rowMap->describe(*fos, Teuchos::VERB_EXTREME); } @@ -186,16 +188,20 @@ int main(int argc, char *argv[]) { A = readCrsMatrixFromFile (mtx_name, fos, rowMap, domainMap, rangeMap, convert_mtx_to_zero_base, num_header_lines); } - if ( myRank == 0 && verbose ) { - *fos << "A->describe" << endl; + if ( verbose ) { + if ( myRank == 0 ) { + *fos << "A->describe" << endl; + } A->describe(*fos, Teuchos::VERB_EXTREME); } RCP RHS; RHS = Tpetra::MatrixMarket::Reader::readDenseFile (rhs_name, comm, rangeMap); - if ( myRank == 0 && verbose ) { - *fos << "RHS->describe" << endl; + if ( verbose ) { + if ( myRank == 0 ) { + *fos << "RHS->describe" << endl; + } RHS->describe(*fos, Teuchos::VERB_EXTREME); } @@ -410,6 +416,8 @@ readCrsMatrixFromFile (const std::string& matrixFilename, for (typename Teuchos::Array::size_type i=0; iinsertGlobalValues (gblRowInds[i], gblColInds(i,1), vals(i,1)); } + } else { + A = Teuchos::rcp(new MAT(rowMap, 0)); } A->fillComplete (domainMap, rangeMap); diff --git a/packages/amesos2/example/SimpleSolve_File.cpp b/packages/amesos2/example/SimpleSolve_File.cpp index 350d755ec402..347908857fed 100644 --- a/packages/amesos2/example/SimpleSolve_File.cpp +++ b/packages/amesos2/example/SimpleSolve_File.cpp @@ -12,6 +12,9 @@ #include #include #include +#include +#include +#include #include #include @@ -22,8 +25,16 @@ #include #include -#include "Amesos2.hpp" -#include "Amesos2_Version.hpp" +#include +#include + +#if defined(HAVE_AMESOS2_XPETRA) && defined(HAVE_AMESOS2_ZOLTAN2) +# include +# include +# include +# include +# include +#endif int main(int argc, char *argv[]) { @@ -32,7 +43,9 @@ int main(int argc, char *argv[]) { typedef Tpetra::CrsMatrix<>::scalar_type Scalar; typedef Tpetra::Map<>::local_ordinal_type LO; typedef Tpetra::Map<>::global_ordinal_type GO; + typedef Tpetra::Map<>::node_type NO; + typedef Tpetra::RowGraph Graph; typedef Tpetra::CrsMatrix MAT; typedef Tpetra::MultiVector MV; @@ -51,24 +64,32 @@ int main(int argc, char *argv[]) { Teuchos::oblackholestream blackhole; - bool printMatrix = false; - bool printSolution = false; - bool checkSolution = false; - bool printTiming = false; - bool allprint = false; + bool printMatrix = false; + bool printSolution = false; + bool checkSolution = false; + bool printTiming = false; + bool useStackedTimer = false; + bool allprint = false; bool verbose = (myRank==0); + bool useZoltan2 = false; + bool useParMETIS = false; std::string mat_filename("arc130.mtx"); std::string rhs_filename(""); std::string solvername("Superlu"); + std::string xml_filename(""); Teuchos::CommandLineProcessor cmdp(false,true); cmdp.setOption("verbose","quiet",&verbose,"Print messages and results."); cmdp.setOption("filename",&mat_filename,"Filename for Matrix-Market test matrix."); cmdp.setOption("rhs_filename",&rhs_filename,"Filename for Matrix-Market right-hand-side."); cmdp.setOption("solvername",&solvername,"Name of solver."); + cmdp.setOption("xml_filename",&xml_filename,"XML Filename for Solver parameters."); cmdp.setOption("print-matrix","no-print-matrix",&printMatrix,"Print the full matrix after reading it."); cmdp.setOption("print-solution","no-print-solution",&printSolution,"Print solution vector after solve."); cmdp.setOption("check-solution","no-check-solution",&checkSolution,"Check solution vector after solve."); + cmdp.setOption("use-zoltan2","no-zoltan2",&useZoltan2,"Use Zoltan2 (Hypergraph) for repartitioning"); + cmdp.setOption("use-parmetis","no-parmetis",&useParMETIS,"Use ParMETIS for repartitioning"); cmdp.setOption("print-timing","no-print-timing",&printTiming,"Print solver timing statistics"); + cmdp.setOption("use-stacked-timer","no-stacked-timer",&useStackedTimer,"Use StackedTimer to print solver timing statistics"); cmdp.setOption("all-print","root-print",&allprint,"All processors print to out"); if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { return -1; @@ -78,27 +99,17 @@ int main(int argc, char *argv[]) { RCP fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(out)); // Say hello - out << myRank << " : " << Amesos2::version() << std::endl << std::endl; + out << myRank << " : " << Amesos2::version() << " on " << comm->getSize() << " MPIs" << std::endl << std::endl; const size_t numVectors = 1; - RCP A = Tpetra::MatrixMarket::Reader::readSparseFile(mat_filename, comm); - if( printMatrix ){ - A->describe(*fos, Teuchos::VERB_EXTREME); - } - else if( verbose ){ - std::cout << std::endl << A->description() << std::endl << std::endl; - } + // Read matrix + RCP A = Tpetra::MatrixMarket::Reader::readSparseFile(mat_filename, comm); - // get the maps - RCP > dmnmap = A->getDomainMap(); + // get the map (Range Map used for both X & B) RCP > rngmap = A->getRangeMap(); - - GO nrows = dmnmap->getGlobalNumElements(); - RCP > root_map - = rcp( new Map(nrows,myRank == 0 ? nrows : 0,0,comm) ); - RCP Xhat = rcp( new MV(root_map,numVectors) ); - RCP > importer = rcp( new Import(dmnmap,root_map) ); + RCP > dmnmap = A->getDomainMap(); + GO nrows = A->getGlobalNumRows(); // Create random X RCP X = rcp(new MV(dmnmap,numVectors)); @@ -122,6 +133,69 @@ int main(int argc, char *argv[]) { B = Tpetra::MatrixMarket::Reader::readDenseFile (rhs_filename, comm, rngmap); } + if (useZoltan2 || useParMETIS) { +#if defined(HAVE_AMESOS2_XPETRA) && defined(HAVE_AMESOS2_ZOLTAN2) + // Specify partitioning parameters + Teuchos::ParameterList zoltan_params; + zoltan_params.set("partitioning_approach", "partition"); + // + if (useParMETIS) { + if (comm->getRank() == 0) { + std::cout << "Using Zoltan2(ParMETIS)" << std::endl; + } + zoltan_params.set("algorithm", "parmetis"); + zoltan_params.set("symmetrize_input", "transpose"); + zoltan_params.set("partitioning_objective", "minimize_cut_edge_weight"); + } else { + if (comm->getRank() == 0) { + std::cout << "Using Zoltan2(HyperGraph)" << std::endl; + } + zoltan_params.set("algorithm", "phg"); + } + + // Create an input adapter for the Tpetra matrix. + Zoltan2::TpetraRowGraphAdapter + zoltan_graph(A->getGraph()); + + // Create and solve partitioning problem + Zoltan2::PartitioningProblem> + problem(&zoltan_graph, &zoltan_params); + problem.solve(); + + // Redistribute matrix + RCP zoltan_A; + Zoltan2::TpetraCrsMatrixAdapter zoltan_matrix(A); + zoltan_matrix.applyPartitioningSolution (*A, zoltan_A, problem.getSolution()); + // Set it as coefficient matrix, and update range map + A = zoltan_A; + rngmap = A->getRangeMap(); + + // Redistribute RHS + RCP zoltan_b; + Zoltan2::XpetraMultiVectorAdapter adapterRHS(rcpFromRef (*B)); + adapterRHS.applyPartitioningSolution (*B, zoltan_b, problem.getSolution()); + // Set it as RHS + B = zoltan_b; + + // Redistribute Sol + RCP zoltan_x; + Zoltan2::XpetraMultiVectorAdapter adapterSol(rcpFromRef (*X)); + adapterSol.applyPartitioningSolution (*X, zoltan_x, problem.getSolution()); + // Set it as Sol + X = zoltan_x; +#else + TEUCHOS_TEST_FOR_EXCEPTION( + useZoltan2, std::invalid_argument, + "Both Xpetra and Zoltan2 are needed to use Zoltan2."); +#endif + } + if( printMatrix ){ + A->describe(*fos, Teuchos::VERB_EXTREME); + } + else if( verbose ){ + std::cout << std::endl << A->description() << std::endl << std::endl; + } + // Constructor from Factory RCP > solver; if( !Amesos2::query(solvername) ){ @@ -130,11 +204,30 @@ int main(int argc, char *argv[]) { } solver = Amesos2::create(solvername, A, X, B); + if (xml_filename != "") { + Teuchos::ParameterList test_params = + Teuchos::ParameterXMLFileReader(xml_filename).getParameters(); + Teuchos::ParameterList& amesos2_params = test_params.sublist("Amesos2"); + *fos << amesos2_params.currentParametersString() << std::endl; + solver->setParameters( Teuchos::rcpFromRef(amesos2_params) ); + } + RCP stackedTimer; + if(useStackedTimer) { + stackedTimer = rcp(new Teuchos::StackedTimer("Amesos2 SimpleSolve-File")); + Teuchos::TimeMonitor::setStackedTimer(stackedTimer); + } solver->symbolicFactorization().numericFactorization().solve(); + if(useStackedTimer) { + stackedTimer->stopBaseTimer(); + } if( printSolution ){ // Print the solution + RCP > root_map + = rcp( new Map(nrows,myRank == 0 ? nrows : 0,0,comm) ); + RCP Xhat = rcp( new MV(root_map,numVectors) ); + RCP > importer = rcp( new Import(rngmap,root_map) ); if( allprint ){ if( myRank == 0 ) *fos << "Solution :" << std::endl; Xhat->describe(*fos,Teuchos::VERB_EXTREME); @@ -167,11 +260,20 @@ int main(int argc, char *argv[]) { if (myRank == 0) *fos << std::endl; } - if( printTiming ){ + if(useStackedTimer) { + Teuchos::StackedTimer::OutputOptions options; + options.num_histogram=3; + options.print_warnings = false; + options.output_histogram = true; + options.output_fraction=true; + options.output_minmax = true; + stackedTimer->report(std::cout, comm, options); + } else if( printTiming ){ // Print some timing statistics solver->printTiming(*fos); + } else { + Teuchos::TimeMonitor::summarize(); } - Teuchos::TimeMonitor::summarize(); // We are done. return 0; diff --git a/packages/amesos2/example/quick_solve_epetra.cpp b/packages/amesos2/example/quick_solve_epetra.cpp index 6367330d1106..dfb3c4301b79 100644 --- a/packages/amesos2/example/quick_solve_epetra.cpp +++ b/packages/amesos2/example/quick_solve_epetra.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -38,18 +39,24 @@ #include "Amesos2.hpp" #include "Amesos2_Version.hpp" +#include "Amesos2_Util.hpp" int main(int argc, char *argv[]) { Teuchos::GlobalMPISession mpiSession(&argc,&argv); typedef Epetra_CrsMatrix MAT; typedef Epetra_MultiVector MV; + typedef Tpetra::CrsMatrix<> TpetraMAT; + typedef Tpetra::Map<>::local_ordinal_type LO; + typedef Tpetra::Map<>::global_ordinal_type GO; + typedef Tpetra::Map<>::node_type NO; using Tpetra::global_size_t; using Teuchos::tuple; using Teuchos::RCP; using Teuchos::rcp; + #ifdef HAVE_MPI const Epetra_MpiComm comm (MPI_COMM_WORLD); #else @@ -68,15 +75,21 @@ int main(int argc, char *argv[]) { std::string solver_name = "SuperLU"; std::string filedir = "../test/matrices/"; std::string filename = "arc130.mtx"; + std::string map_filename = ""; + bool make_contiguous = false; Teuchos::CommandLineProcessor cmdp(false,true); cmdp.setOption("verbose","quiet",&verbose,"Print messages and results."); cmdp.setOption("filedir",&filedir,"Directory where matrix-market files are located"); cmdp.setOption("filename",&filename,"Filename for Matrix-Market test matrix."); + cmdp.setOption("map_filename",&map_filename,"Filename for rowMap of test matrix."); cmdp.setOption("print-matrix","no-print-matrix",&printMatrix,"Print the full matrix after reading it."); cmdp.setOption("print-solution","no-print-solution",&printSolution,"Print solution vector after solve."); cmdp.setOption("print-timing","no-print-timing",&printTiming,"Print solver timing statistics"); cmdp.setOption("solver", &solver_name, "Which TPL solver library to use."); + cmdp.setOption("makeContiguous","isContiguous",&make_contiguous, "Set this option to makeContiguous if matrix has gapped row ids"); + if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { + std::cerr << solver_name << " failed to process command-line args. Exiting..." << std::endl; return -1; } @@ -92,10 +105,20 @@ int main(int argc, char *argv[]) { std::string mat_pathname = filedir + filename; MAT* A; - int ret = EpetraExt::MatrixMarketFileToCrsMatrix(mat_pathname.c_str(), comm, A, false, false); - if( ret == -1 ){ - *fos << "error reading matrix file from disk, aborting..." << std::endl; - return EXIT_FAILURE; + if (map_filename != "") { + auto rowTMap = Tpetra::MatrixMarket::Reader< TpetraMAT >::readMapFile(map_filename, Tpetra::getDefaultComm()); + auto rowEMap = Amesos2::Util::tpetra_map_to_epetra_map(*(rowTMap.getRawPtr())); + int ret = EpetraExt::MatrixMarketFileToCrsMatrix(mat_pathname.c_str(), *rowEMap, A, false, verbose); + if( ret == -1 ){ + *fos << "error reading matrix file (" << mat_pathname << ") with map (" << map_filename << ") from disk, aborting..." << std::endl; + return EXIT_FAILURE; + } + } else { + int ret = EpetraExt::MatrixMarketFileToCrsMatrix(mat_pathname.c_str(), comm, A, false, verbose); + if( ret == -1 ){ + *fos << "error reading matrix file from disk, aborting..." << std::endl; + return EXIT_FAILURE; + } } if( printMatrix ){ @@ -122,16 +145,29 @@ int main(int argc, char *argv[]) { return 0; } + Teuchos::ParameterList amesos2_params("Amesos2"); + if ( make_contiguous ) { + if( myRank == 0 ) { *fos << " set IsContigous==false in solver parameter list" << std::endl; } + amesos2_params.sublist(solver->name()).set("IsContiguous", false, "Are GIDs Contiguous"); + } #ifdef HAVE_AMESOS2_SHYLU_NODEBASKER - if( Amesos2::query("shylubasker") ) { - Teuchos::ParameterList amesos2_params("Amesos2"); + if( Amesos2::query("shylubasker") && solver->name() == "ShyLUBasker") { amesos2_params.sublist(solver_name).set("num_threads", 1, "Number of threads"); - solver->setParameters( Teuchos::rcpFromRef(amesos2_params) ); } #endif + solver->setParameters( Teuchos::rcpFromRef(amesos2_params) ); solver->solve(); + { + double nrmR, nrmB; + RCP R = rcp(new MV(rngmap,numVectors)); + A->Apply(*X, *R); + R->Update(1.0, *B, -1.0); + R->Norm2(&nrmR); + B->Norm2(&nrmB); + if( myRank == 0 ) { *fos << std::endl << nrmR << " / " << nrmB << " = " << nrmR/nrmB << std::endl << std::endl; } + } if( printSolution ){ // Print the solution X->Print(*(fos->getOStream())); diff --git a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp index de968c111e7d..4b91a84e892a 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp @@ -76,7 +76,7 @@ namespace Amesos2 { // This may be PMKL::_INTEGER_t or long long int depending on the // mapping and input ordinal - typedef typename TypeMap::type int_t; + typedef typename TypeMap::type int_t; /* For CssMKL we dispatch based on the integer type instead of * the scalar type: @@ -271,13 +271,13 @@ namespace Amesos2 { bool css_initialized_; bool is_contiguous_; + /// The messaging level. Set to 1 if you wish for Pardiso MKL to print statistical info + int_t msglvl_; + /// CssMKL parameter vector. Note that the documentation uses /// 1-based indexing, but our interface must use 0-based indexing int_t iparm_[64]; - /// The messaging level. Set to 1 if you wish for Pardiso MKL to print statistical info - static const int_t msglvl_; - // We will deal with 1 factor at a time static const int_t maxfct_; static const int_t mnum_; @@ -289,6 +289,8 @@ namespace Amesos2 { MPI_Fint CssComm_; Teuchos::RCP css_rowmap_; + Teuchos::RCP css_contig_rowmap_; + Teuchos::RCP css_contig_colmap_; }; // End class CssMKL diff --git a/packages/amesos2/src/Amesos2_CssMKL_def.hpp b/packages/amesos2/src/Amesos2_CssMKL_def.hpp index ea043e663c2b..b0191f7c7921 100644 --- a/packages/amesos2/src/Amesos2_CssMKL_def.hpp +++ b/packages/amesos2/src/Amesos2_CssMKL_def.hpp @@ -46,23 +46,32 @@ namespace Amesos2 { , nrhs_(0) , css_initialized_(false) , is_contiguous_(true) + , msglvl_(0) { + // Matrix info + Teuchos::RCP > matComm = this->matrixA_->getComm (); + const global_ordinal_type indexBase = this->matrixA_->getRowMap ()->getIndexBase (); + const local_ordinal_type nrows = this->matrixA_->getLocalNumRows(); + + // rowmap for loadA (to have locally contiguous) + css_rowmap_ = + Teuchos::rcp (new map_type (this->globalNumRows_, nrows, indexBase, matComm)); + css_contig_rowmap_ = Teuchos::rcp (new map_type (0, 0, indexBase, matComm)); + css_contig_colmap_ = Teuchos::rcp (new map_type (0, 0, indexBase, matComm)); + // set the default matrix type set_css_mkl_matrix_type(); set_css_mkl_default_parameters(pt_, iparm_); // index base - const global_ordinal_type indexBase = this->matrixA_->getRowMap ()->getIndexBase (); iparm_[34] = (indexBase == 0 ? 1 : 0); /* Use one or zero-based indexing */ - // 1D block-row distribution - auto frow = this->matrixA_->getRowMap()->getMinGlobalIndex(); - auto nrows = this->matrixA_->getLocalNumRows(); + // 1D block-row distribution (using Contiguous map) + auto frow = css_rowmap_->getMinGlobalIndex(); iparm_[39] = 2; /* Matrix input format. */ iparm_[40] = frow; /* > Beginning of input domain. */ iparm_[41] = frow+nrows-1; /* > End of input domain. */ // get MPI Comm - Teuchos::RCP > matComm = this->matrixA_->getComm (); TEUCHOS_TEST_FOR_EXCEPTION( matComm.is_null (), std::logic_error, "Amesos2::CssMKL " "constructor: The matrix's communicator is null!"); @@ -81,10 +90,6 @@ namespace Amesos2 { "MPI_COMM_NULL."); MPI_Comm CssComm = *(matMpiComm->getRawMpiComm ()); CssComm_ = MPI_Comm_c2f(CssComm); - - // rowmap for loadA (to have locally contiguous) - css_rowmap_ = - Teuchos::rcp (new map_type (this->globalNumRows_, nrows, indexBase, matComm)); } @@ -125,6 +130,10 @@ namespace Amesos2 { int CssMKL::symbolicFactorization_impl() { + if (msglvl_ > 0 && this->matrixA_->getComm()->getRank() == 0) { + std::cout << " CssMKL::symbolicFactorization:\n" << std::endl; + for (int i=0; i < 64; i++) std::cout << " * IPARM[" << i << "] = " << iparm_[i] << std::endl; + } int_t error = 0; { #ifdef HAVE_AMESOS2_TIMERS @@ -141,13 +150,16 @@ namespace Amesos2 { const_cast(&msglvl_), &bdummy, &xdummy, &CssComm, &error ); } check_css_mkl_error(Amesos2::SYMBFACT, error); + if (msglvl_ > 0 && this->matrixA_->getComm()->getRank() == 0) { + std::cout << " CssMKL::symbolicFactorization done:" << std::endl; + std::cout << " * Time : " << this->timers_.symFactTime_.totalElapsedTime() << std::endl; + } // Pardiso only lets you retrieve the total number of factor // non-zeros, not for each individually. We should document how // such a situation is reported. this->setNnzLU(iparm_[17]); css_initialized_ = true; - return(0); } @@ -156,6 +168,9 @@ namespace Amesos2 { int CssMKL::numericFactorization_impl() { + if (msglvl_ > 0 && this->matrixA_->getComm()->getRank() == 0) { + std::cout << " CssMKL::numericFactorization:\n" << std::endl; + } int_t error = 0; { #ifdef HAVE_AMESOS2_TIMERS @@ -173,6 +188,10 @@ namespace Amesos2 { const_cast(&msglvl_), &bdummy, &xdummy, &CssComm, &error ); } check_css_mkl_error(Amesos2::NUMFACT, error); + if (msglvl_ > 0 && this->matrixA_->getComm()->getRank() == 0) { + std::cout << " CssMKL::numericFactorization done:" << std::endl; + std::cout << " Time : " << this->timers_.numFactTime_.totalElapsedTime() << std::endl; + } return( 0 ); } @@ -202,8 +221,7 @@ namespace Amesos2 { MultiVecAdapter, solver_scalar_type>::do_get(B, bvals_(), as(ld_rhs), - DISTRIBUTED_NO_OVERLAP, - this->rowIndexBase_); + Teuchos::ptrInArg(*css_rowmap_)); } int_t error = 0; @@ -242,7 +260,7 @@ namespace Amesos2 { MultiVecAdapter, solver_scalar_type>::do_put(X, xvals_(), as(ld_rhs), - DISTRIBUTED_NO_OVERLAP); + Teuchos::ptrInArg(*css_rowmap_)); } return( 0 ); @@ -268,7 +286,7 @@ namespace Amesos2 { RCP valid_params = getValidParameters_impl(); - // Fill-in reordering: 0 = minimum degree, 2 = METIS 4.0.1 (default), 3 = METIS 5.1, 4 = AMD, + // 2: Fill-in reordering from METIS, 3: thread dissection, 10: MPI version of the nested dissection if( parameterList->isParameter("IPARM(2)") ) { RCP fillin_validator = valid_params->getEntry("IPARM(2)").validator(); @@ -318,10 +336,22 @@ namespace Amesos2 { parameterList->getEntry("IPARM(18)").setValidator(report_validator); iparm_[17] = getIntegralValue(*parameterList, "IPARM(18)"); } + + // Check input matrix is sorted + if( parameterList->isParameter("IPARM(28)") ) + { + RCP report_validator = valid_params->getEntry("IPARM(28)").validator(); + parameterList->getEntry("IPARM(28)").setValidator(report_validator); + iparm_[27] = getIntegralValue(*parameterList, "IPARM(28)"); + } if( parameterList->isParameter("IsContiguous") ){ is_contiguous_ = parameterList->get("IsContiguous"); } + + if( parameterList->isParameter("verbose") ){ + msglvl_ = parameterList->get("verbose"); + } } @@ -406,8 +436,13 @@ CssMKL::getValidParameters_impl() const pl->set("IPARM(18)", as(iparm_temp[17]), "Report the number of non-zero elements in the factors", anyNumberParameterEntryValidator(preferred_int, accept_int)); + pl->set("IPARM(28)", as(iparm_temp[27]), "Check input matrix is sorted", + anyNumberParameterEntryValidator(preferred_int, accept_int)); + pl->set("IsContiguous", true, "Whether GIDs contiguous"); + pl->set("verbose", 0, "Verbosity Message Level"); + valid_params = pl; } @@ -427,42 +462,82 @@ CssMKL::loadA_impl(EPhase current_phase) // CssMKL does not need matrix data in the pre-ordering phase if( current_phase == PREORDERING ) return( false ); + // is_contiguous : input is contiguous + // CONTIGUOUS_AND_ROOTED : input is not contiguous, so make output contiguous EDistribution dist_option = (iparm_[39] != 0 ? DISTRIBUTED_NO_OVERLAP : ((is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED)); - if (current_phase == SYMBFACT) { - if (dist_option == DISTRIBUTED_NO_OVERLAP) { - Kokkos::resize(nzvals_temp_, this->matrixA_->getLocalNNZ()); - Kokkos::resize(nzvals_view_, this->matrixA_->getLocalNNZ()); - Kokkos::resize(colind_view_, this->matrixA_->getLocalNNZ()); - Kokkos::resize(rowptr_view_, this->matrixA_->getLocalNumRows() + 1); - } else { - if( this->root_ ) { - Kokkos::resize(nzvals_temp_, this->matrixA_->getGlobalNNZ()); - Kokkos::resize(nzvals_view_, this->matrixA_->getGlobalNNZ()); - Kokkos::resize(colind_view_, this->matrixA_->getGlobalNNZ()); - Kokkos::resize(rowptr_view_, this->matrixA_->getGlobalNumRows() + 1); + if (dist_option == DISTRIBUTED_NO_OVERLAP && !is_contiguous_) { + // Neeed to form contiguous matrix + #if 1 + // Only reinex GIDs + css_rowmap_ = this->matrixA_->getRowMap(); // use original map to redistribute vectors in solve + Teuchos::RCP > contig_mat = this->matrixA_->reindex(css_contig_rowmap_, css_contig_colmap_); + #else + // Redistribued matrixA into contiguous GIDs + Teuchos::RCP > contig_mat = this->matrixA_->get(ptrInArg(*css_rowmap_)); + //css_rowmap_ = contig_mat->getRowMap(); // use new map to redistribute vectors in solve + #endif + // Copy into local views + if (current_phase == SYMBFACT) { + Kokkos::resize(nzvals_temp_, contig_mat->getLocalNNZ()); + Kokkos::resize(nzvals_view_, contig_mat->getLocalNNZ()); + Kokkos::resize(colind_view_, contig_mat->getLocalNNZ()); + Kokkos::resize(rowptr_view_, contig_mat->getLocalNumRows() + 1); + } + int_t nnz_ret = 0; + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); +#endif + Util::get_crs_helper_kokkos_view, + host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get( + contig_mat.ptr(), + nzvals_temp_, colind_view_, rowptr_view_, + nnz_ret, + ptrInArg(*(contig_mat->getRowMap())), + #if 1 + DISTRIBUTED_NO_OVERLAP, + #else + ROOTED, + #endif + SORTED_INDICES); + Kokkos::deep_copy(nzvals_view_, nzvals_temp_); + } + } else { + if (current_phase == SYMBFACT) { + if (dist_option == DISTRIBUTED_NO_OVERLAP) { + Kokkos::resize(nzvals_temp_, this->matrixA_->getLocalNNZ()); + Kokkos::resize(nzvals_view_, this->matrixA_->getLocalNNZ()); + Kokkos::resize(colind_view_, this->matrixA_->getLocalNNZ()); + Kokkos::resize(rowptr_view_, this->matrixA_->getLocalNumRows() + 1); } else { - Kokkos::resize(nzvals_temp_, 0); - Kokkos::resize(nzvals_view_, 0); - Kokkos::resize(colind_view_, 0); - Kokkos::resize(rowptr_view_, 0); + if( this->root_ ) { + Kokkos::resize(nzvals_temp_, this->matrixA_->getGlobalNNZ()); + Kokkos::resize(nzvals_view_, this->matrixA_->getGlobalNNZ()); + Kokkos::resize(colind_view_, this->matrixA_->getGlobalNNZ()); + Kokkos::resize(rowptr_view_, this->matrixA_->getGlobalNumRows() + 1); + } else { + Kokkos::resize(nzvals_temp_, 0); + Kokkos::resize(nzvals_view_, 0); + Kokkos::resize(colind_view_, 0); + Kokkos::resize(rowptr_view_, 0); + } } } - } - - { + int_t nnz_ret = 0; + { #ifdef HAVE_AMESOS2_TIMERS - Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); + Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); #endif - int_t nnz_ret = 0; - Util::get_crs_helper_kokkos_view, - host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get( - this->matrixA_.ptr(), - nzvals_temp_, colind_view_, rowptr_view_, - nnz_ret, - Teuchos::ptrInArg(*css_rowmap_), - dist_option, - SORTED_INDICES); - Kokkos::deep_copy(nzvals_view_, nzvals_temp_); + Util::get_crs_helper_kokkos_view, + host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get( + this->matrixA_.ptr(), + nzvals_temp_, colind_view_, rowptr_view_, + nnz_ret, + ptrInArg(*(this->matrixA_->getRowMap())), + dist_option, + SORTED_INDICES); + Kokkos::deep_copy(nzvals_view_, nzvals_temp_); + } } return( true ); } @@ -564,14 +639,22 @@ CssMKL::set_css_mkl_default_parameters(void* pt[], int_t iparm[]) // Reset some of the default parameters iparm[1] = 10; /* 2: Fill-in reordering from METIS, 3: thread dissection, 10: MPI version of the nested dissection and symbolic factorization*/ iparm[7] = 0; /* Max numbers of iterative refinement steps */ - iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */ iparm[10] = 0; /* Disable nonsymmetric permutation and scaling MPS */ iparm[11] = 0; /* Normal solve (0), or a transpose solve (1) */ iparm[12] = 0; /* Do not use (non-)symmetric matchings */ iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ - iparm[20] = -1; /* Pivoting for symmetric indefinite matrices */ + iparm[20] = 1; /* Pivoting for symmetric indefinite matrices */ iparm[26] = 1; /* Check input matrix is sorted */ + // diagonal pertubation + if (mtype_ == -2 || mtype_ == -4) { + // symmetric indefinite + iparm[9] = 8; /* Perturb the pivot elements with 1E-8 */ + } else { + // non-symmetric + iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */ + } + // set single or double precision if constexpr ( std::is_same_v ) { iparm[27] = 1; // single-precision @@ -581,12 +664,9 @@ CssMKL::set_css_mkl_default_parameters(void* pt[], int_t iparm[]) iparm[34] = 1; /* Use zero-based indexing */ } -template -const char* CssMKL::name = "CSSMKL"; template -const typename CssMKL::int_t -CssMKL::msglvl_ = 0; // set to be one, for more CSS messages +const char* CssMKL::name = "CSSMKL"; template const typename CssMKL::int_t diff --git a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp index 0f376c01bb41..c75d3c42c035 100644 --- a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_decl.hpp @@ -24,6 +24,9 @@ #include "Amesos2_config.h" #include +#ifdef HAVE_AMESOS2_EPETRAEXT +#include +#endif #include "Amesos2_EpetraRowMatrix_AbstractMatrixAdapter_decl.hpp" #include "Amesos2_MatrixAdapter_decl.hpp" @@ -69,7 +72,19 @@ namespace Amesos2 { ConcreteMatrixAdapter(RCP m); RCP > get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; - + RCP > reindex_impl(Teuchos::RCP> &contigRowMap, + Teuchos::RCP> &contigColMap) const; + + //! Print a description of this adapter to the given output stream + void + describe (Teuchos::FancyOStream& os, + const Teuchos::EVerbosityLevel verbLevel = + Teuchos::Describable::verbLevel_default) const; +#ifdef HAVE_AMESOS2_EPETRAEXT + private: + mutable RCP StdIndex_; + mutable RCP ContigMat_; +#endif }; } // end namespace Amesos2 diff --git a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp index eb197a4222a6..7dc7cd4babe5 100644 --- a/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_EpetraCrsMatrix_MatrixAdapter_def.hpp @@ -47,6 +47,43 @@ namespace Amesos2 { return( rcp(new ConcreteMatrixAdapter(t_mat)) ); } + Teuchos::RCP > + ConcreteMatrixAdapter::reindex_impl(Teuchos::RCP > &contigRowMap, + Teuchos::RCP > &contigColMap) const + { + #if defined(HAVE_AMESOS2_EPETRAEXT) + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::rcpFromRef; + auto CrsMatrix = const_cast(this->mat_.getRawPtr()); + if(!CrsMatrix) { + TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Amesos2_EpetraCrsMatrix_MatrixAdapter requires CsrMatrix to reindex matrices."); + } + + // Map + RCP OriginalMap = rcpFromRef(CrsMatrix->RowMap()); + int NumGlobalElements = OriginalMap->NumGlobalElements(); + int NumMyElements = OriginalMap->NumMyElements(); + auto ReindexMap = rcp( new Epetra_Map( NumGlobalElements, NumMyElements, 0, OriginalMap->Comm() ) ); + + // Matrix + StdIndex_ = rcp( new EpetraExt::CrsMatrix_Reindex( *ReindexMap ) ); + ContigMat_ = rcpFromRef((*StdIndex_)( *CrsMatrix )); + if(!ContigMat_) { + TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Amesos2_EpetraCrsMatrix_MatrixAdapter reindexing failed."); + } + return rcp(new ConcreteMatrixAdapter(ContigMat_)); + #else + TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "ConcreteMatrixAdapter requires EpetraExt to reindex matrices."); + #endif + } + + void + ConcreteMatrixAdapter::describe (Teuchos::FancyOStream& os, + const Teuchos::EVerbosityLevel verbLevel) const + { + this->mat_->Print(*(os.getOStream())); + } } // end namespace Amesos2 #endif // AMESOS2_EPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp index 723837b8c9a4..812e6ac7321e 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_decl.hpp @@ -52,6 +52,7 @@ namespace Amesos2 { typedef Matrix matrix_t; typedef MatrixAdapter type; typedef ConcreteMatrixAdapter adapter_t; + typedef Tpetra::Map map_t; typedef typename MatrixTraits::global_host_idx_type global_host_idx_t; typedef typename MatrixTraits::global_host_val_type global_host_val_t; @@ -100,7 +101,7 @@ namespace Amesos2 { KV_GO & colind, KV_GS & rowptr, global_size_t& nnz, - const Teuchos::Ptr > rowmap, + const Teuchos::Ptr rowmap, EStorage_Ordering ordering=ARBITRARY, EDistribution distribution=ROOTED) const; // This was placed as last argument to preserve API @@ -151,7 +152,7 @@ namespace Amesos2 { KV_GO & rowind, KV_GS & colptr, global_size_t& nnz, - const Teuchos::Ptr > colmap, + const Teuchos::Ptr colmap, EStorage_Ordering ordering=ARBITRARY, EDistribution distribution=ROOTED) const; // This was placed as last argument to preserve API @@ -199,22 +200,26 @@ namespace Amesos2 { /// Get the local number of non-zeros on this processor size_t getLocalNNZ() const; - Teuchos::RCP > + Teuchos::RCP getMap() const { return static_cast(this)->getMap_impl(); } - Teuchos::RCP > + Teuchos::RCP getRowMap() const { return row_map_; } - Teuchos::RCP > + Teuchos::RCP getColMap() const { return col_map_; } - Teuchos::RCP get(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; + Teuchos::RCP get(const Teuchos::Ptr map, EDistribution distribution = ROOTED) const; + + /// Reindex the GIDs such that they are contiguous without gaps (0, .., n-1) + /// This is called in loadA for the matrix with (DISTRIBUTED_NO_OVERLAP && !is_contiguous_) + Teuchos::RCP reindex(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap) const; /// Returns a short description of this Solver std::string description() const; diff --git a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp index 7cf8a28c0b97..14fa5dd3305a 100644 --- a/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_MatrixAdapter_def.hpp @@ -41,7 +41,7 @@ namespace Amesos2 { KV_GO & colind, KV_GS & rowptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > rowmap, + const Teuchos::Ptr rowmap, EStorage_Ordering ordering, EDistribution distribution) const { @@ -60,7 +60,7 @@ namespace Amesos2 { EDistribution distribution, EStorage_Ordering ordering) const { - const Teuchos::RCP > rowmap + const Teuchos::RCP rowmap = Util::getDistributionMap(distribution, this->getGlobalNumRows(), this->getComm()); @@ -74,7 +74,7 @@ namespace Amesos2 { KV_GO & rowind, KV_GS & colptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > colmap, + const Teuchos::Ptr colmap, EStorage_Ordering ordering, EDistribution distribution) const { @@ -93,7 +93,7 @@ namespace Amesos2 { EDistribution distribution, EStorage_Ordering ordering) const { - const Teuchos::RCP > colmap + const Teuchos::RCP colmap = Util::getDistributionMap(distribution, this->getGlobalNumCols(), this->getComm()); @@ -176,7 +176,10 @@ namespace Amesos2 { void MatrixAdapter::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const - {} + { + // (implemented for Epetra::CrsMatrix & Tpetra::CrsMatrix) + return static_cast(this)->describe(out, verbLevel); + } template < class Matrix > template < class KV > @@ -209,7 +212,7 @@ namespace Amesos2 { KV_GO & colind, KV_GS & rowptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > rowmap, + const Teuchos::Ptr rowmap, EDistribution distribution, EStorage_Ordering ordering, no_special_impl nsi) const @@ -229,7 +232,7 @@ namespace Amesos2 { KV_GO & colind, KV_GS & rowptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > rowmap, + const Teuchos::Ptr rowmap, EDistribution distribution, EStorage_Ordering ordering, row_access ra) const @@ -270,7 +273,7 @@ namespace Amesos2 { // TODO: There may be some more checking between the row map // compatibility, but things are working fine now. - RCP > rmap = get_mat->getRowMap(); + RCP rmap = get_mat->getRowMap(); ArrayView node_elements = rmap->getLocalElementList(); //if( node_elements.size() == 0 ) return; // no more contribution typename ArrayView::iterator row_it, row_end; @@ -384,7 +387,7 @@ namespace Amesos2 { KV_GO & rowind, KV_GS & colptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > colmap, + const Teuchos::Ptr colmap, EDistribution distribution, EStorage_Ordering ordering, no_special_impl nsi) const @@ -404,7 +407,7 @@ namespace Amesos2 { KV_GO & rowind, KV_GS & colptr, typename MatrixAdapter::global_size_t& nnz, - const Teuchos::Ptr > colmap, + const Teuchos::Ptr colmap, EDistribution distribution, EStorage_Ordering ordering, row_access ra) const @@ -505,12 +508,20 @@ namespace Amesos2 { template < class Matrix > Teuchos::RCP > - MatrixAdapter::get(const Teuchos::Ptr > map, EDistribution distribution) const + MatrixAdapter::get(const Teuchos::Ptr map, EDistribution distribution) const { return static_cast(this)->get_impl(map, distribution); } + template < class Matrix > + Teuchos::RCP > + MatrixAdapter::reindex(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap) const + { + return static_cast(this)->reindex_impl(contigRowMap, contigColMap); + } + + template Teuchos::RCP > createMatrixAdapter(Teuchos::RCP m){ diff --git a/packages/amesos2/src/Amesos2_SolverCore_def.hpp b/packages/amesos2/src/Amesos2_SolverCore_def.hpp index 6907cc93a708..cd214b210aae 100644 --- a/packages/amesos2/src/Amesos2_SolverCore_def.hpp +++ b/packages/amesos2/src/Amesos2_SolverCore_def.hpp @@ -92,19 +92,23 @@ SolverCore::symbolicFactorization() Teuchos::TimeMonitor LocalTimer1(timers_.totalTime_); #endif - if( !status_.preOrderingDone() ){ - preOrdering(); - if( !matrix_loaded_ ) loadA(SYMBFACT); - } else { - loadA(SYMBFACT); - } + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor LocalTimer2(timers_.coreSymFactTime_); +#endif + if( !status_.preOrderingDone() ){ + preOrdering(); + if( !matrix_loaded_ ) loadA(SYMBFACT); + } else { + loadA(SYMBFACT); + } - int error_code = static_cast(this)->symbolicFactorization_impl(); - if (error_code == EXIT_SUCCESS){ - ++status_.numSymbolicFact_; - status_.last_phase_ = SYMBFACT; + int error_code = static_cast(this)->symbolicFactorization_impl(); + if (error_code == EXIT_SUCCESS){ + ++status_.numSymbolicFact_; + status_.last_phase_ = SYMBFACT; + } } - return *this; } @@ -116,18 +120,22 @@ SolverCore::numericFactorization() #ifdef HAVE_AMESOS2_TIMERS Teuchos::TimeMonitor LocalTimer1(timers_.totalTime_); #endif + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor LocalTimer2(timers_.coreNumFactTime_); +#endif + if( !status_.symbolicFactorizationDone() ){ + symbolicFactorization(); + if( !matrix_loaded_ ) loadA(NUMFACT); + } else { + loadA(NUMFACT); + } - if( !status_.symbolicFactorizationDone() ){ - symbolicFactorization(); - if( !matrix_loaded_ ) loadA(NUMFACT); - } else { - loadA(NUMFACT); - } - - int error_code = static_cast(this)->numericFactorization_impl(); - if (error_code == EXIT_SUCCESS){ - ++status_.numNumericFact_; - status_.last_phase_ = NUMFACT; + int error_code = static_cast(this)->numericFactorization_impl(); + if (error_code == EXIT_SUCCESS){ + ++status_.numNumericFact_; + status_.last_phase_ = NUMFACT; + } } return *this; @@ -189,10 +197,15 @@ SolverCore::solve(const Teuchos::Ptr X, const_cast(*this).numericFactorization(); } - int error_code = static_cast(this)->solve_impl(Teuchos::outArg(*x), Teuchos::ptrInArg(*b)); - if (error_code == EXIT_SUCCESS){ - ++status_.numSolve_; - status_.last_phase_ = SOLVE; + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor LocalTimer2(timers_.coreSolveTime_); +#endif + int error_code = static_cast(this)->solve_impl(Teuchos::outArg(*x), Teuchos::ptrInArg(*b)); + if (error_code == EXIT_SUCCESS){ + ++status_.numSolve_; + status_.last_phase_ = SOLVE; + } } } diff --git a/packages/amesos2/src/Amesos2_Tacho_decl.hpp b/packages/amesos2/src/Amesos2_Tacho_decl.hpp index 95c71b184dc6..07acdaa91e49 100644 --- a/packages/amesos2/src/Amesos2_Tacho_decl.hpp +++ b/packages/amesos2/src/Amesos2_Tacho_decl.hpp @@ -196,6 +196,8 @@ class TachoSolver : public SolverCore int small_problem_threshold_size; int streams; bool verbose; + int dofs_per_node; + bool pivot_pert; // int num_kokkos_threads; // int max_num_superblocks; } data_; diff --git a/packages/amesos2/src/Amesos2_Tacho_def.hpp b/packages/amesos2/src/Amesos2_Tacho_def.hpp index 221e505dbc54..e4f1bd98566b 100644 --- a/packages/amesos2/src/Amesos2_Tacho_def.hpp +++ b/packages/amesos2/src/Amesos2_Tacho_def.hpp @@ -27,10 +27,12 @@ TachoSolver::TachoSolver( Teuchos::RCP B ) : SolverCore(A, X, B) { - data_.method = 1; // Cholesky - data_.variant = 2; // solver variant - data_.streams = 1; // # of streams - data_.verbose = false; // verbose + data_.method = 1; // Cholesky + data_.variant = 2; // solver variant + data_.streams = 1; // # of streams + data_.dofs_per_node = 1; // DoFs / node + data_.pivot_pert = false; // Diagonal pertubation + data_.verbose = false; // verbose } @@ -82,7 +84,11 @@ TachoSolver::symbolicFactorization_impl() // data_.solver.setMaxNumberOfSuperblocks(data_.max_num_superblocks); // Symbolic factorization currently must be done on host - data_.solver.analyze(this->globalNumCols_, host_row_ptr_view_, host_cols_view_); + if (data_.dofs_per_node > 1) { + data_.solver.analyze(this->globalNumCols_, data_.dofs_per_node, host_row_ptr_view_, host_cols_view_); + } else { + data_.solver.analyze(this->globalNumCols_, host_row_ptr_view_, host_cols_view_); + } data_.solver.initialize(); } return status; @@ -102,6 +108,11 @@ TachoSolver::numericFactorization_impl() if(do_optimization()) { this->matrixA_->returnValues_kokkos_view(device_nzvals_view_); } + if (data_.pivot_pert) { + data_.solver.useDefaultPivotTolerance(); + } else { + data_.solver.useNoPivotTolerance(); + } data_.solver.factorize(device_nzvals_view_); } return status; @@ -223,6 +234,10 @@ TachoSolver::setParameters_impl(const Teuchos::RCPget ("verbose", false); // # of streams data_.streams = parameterList->get ("num-streams", 1); + // DoFs / node + data_.dofs_per_node = parameterList->get ("dofs-per-node", 1); + // Perturb tiny pivots + data_.pivot_pert = parameterList->get ("perturb-pivot", false); // TODO: Confirm param options // data_.num_kokkos_threads = parameterList->get("kokkos-threads", 1); // data_.max_num_superblocks = parameterList->get("max-num-superblocks", 4); @@ -243,6 +258,8 @@ TachoSolver::getValidParameters_impl() const pl->set("small problem threshold size", 1024, "Problem size threshold below with Tacho uses LAPACK."); pl->set("verbose", false, "Verbosity"); pl->set("num-streams", 1, "Number of GPU streams"); + pl->set("dofs-per-node", 1, "DoFs per node"); + pl->set("perturb-pivot", false, "Perturb tiny pivots"); // TODO: Confirm param options // pl->set("kokkos-threads", 1, "Number of threads"); diff --git a/packages/amesos2/src/Amesos2_Timers.hpp b/packages/amesos2/src/Amesos2_Timers.hpp index 1f53727d3154..f4468e8b09ba 100644 --- a/packages/amesos2/src/Amesos2_Timers.hpp +++ b/packages/amesos2/src/Amesos2_Timers.hpp @@ -36,6 +36,9 @@ struct Timers { , symFactTime_(*(Teuchos::TimeMonitor::getNewTimer("Time for symbolic factorization"))) , numFactTime_(*(Teuchos::TimeMonitor::getNewTimer("Time for numeric factorization"))) , solveTime_(*(Teuchos::TimeMonitor::getNewTimer("Time for solve"))) + , coreSymFactTime_(*(Teuchos::TimeMonitor::getNewTimer("SolverCore::symbolicFactorization"))) + , coreNumFactTime_(*(Teuchos::TimeMonitor::getNewTimer("SolverCore::numericFactorization"))) + , coreSolveTime_(*(Teuchos::TimeMonitor::getNewTimer("SolverCore::solve"))) , totalTime_(*(Teuchos::TimeMonitor::getNewTimer("Total Time in Amesos2 interface"))) {} @@ -47,6 +50,9 @@ struct Timers { Teuchos::Time symFactTime_; Teuchos::Time numFactTime_; Teuchos::Time solveTime_; + Teuchos::Time coreSymFactTime_; + Teuchos::Time coreNumFactTime_; + Teuchos::Time coreSolveTime_; Teuchos::Time totalTime_; }; diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp index 1ed32235e915..9718c89e1025 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_decl.hpp @@ -83,11 +83,18 @@ namespace Amesos2 { typedef typename super_t::global_size_t global_size_t; typedef ConcreteMatrixAdapter type; + typedef Tpetra::Map map_t; ConcreteMatrixAdapter(RCP m); - RCP > get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; + RCP > get_impl(const Teuchos::Ptr map, EDistribution distribution = ROOTED) const; + RCP > reindex_impl(Teuchos::RCP &contigRowMap, Teuchos::RCP &contigColMap) const; + //! Print a description of this adapter to the given output stream + void + describe (Teuchos::FancyOStream& os, + const Teuchos::EVerbosityLevel verbLevel = + Teuchos::Describable::verbLevel_default) const; }; } // end namespace Amesos2 diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp index 4d6ea994dcc0..9bf58d9d82df 100644 --- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp @@ -44,7 +44,7 @@ namespace Amesos2 { Teuchos::RCP > > ConcreteMatrixAdapter< Tpetra::CrsMatrix - >::get_impl(const Teuchos::Ptr > map, EDistribution distribution) const + >::get_impl(const Teuchos::Ptr map, EDistribution distribution) const { using Teuchos::RCP; using Teuchos::rcp; @@ -68,11 +68,11 @@ namespace Amesos2 { const size_t local_num_contiguous_entries = (myRank == 0) ? t_mat->getGlobalNumRows() : 0; //create maps - typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type; - RCP contiguousRowMap = rcp( new contiguous_map_type(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); - RCP contiguousColMap = rcp( new contiguous_map_type(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); - RCP contiguousDomainMap = rcp( new contiguous_map_type(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); - RCP contiguousRangeMap = rcp( new contiguous_map_type(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); + //typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type; + RCP contiguousRowMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); + RCP contiguousColMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); + RCP contiguousDomainMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); + RCP contiguousRangeMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) ); RCP contiguous_t_mat = rcp( new matrix_t(contiguousRowMap, contiguousColMap, local_matrix) ); contiguous_t_mat->resumeFill(); @@ -84,6 +84,93 @@ namespace Amesos2 { return rcp (new ConcreteMatrixAdapter (t_mat)); } + + + template + Teuchos::RCP > > + ConcreteMatrixAdapter< + Tpetra::CrsMatrix + >::reindex_impl(Teuchos::RCP &contigRowMap, + Teuchos::RCP &contigColMap) const + { + typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type; + auto rowMap = this->mat_->getRowMap(); + auto colMap = this->mat_->getColMap(); + auto rowComm = rowMap->getComm(); + auto colComm = colMap->getComm(); + +#ifdef HAVE_AMESOS2_TIMERS + auto reindexTimer = Teuchos::TimeMonitor::getNewTimer("Time to re-index matrix gids"); + Teuchos::TimeMonitor ReindexTimer(*reindexTimer); +#endif + + global_ordinal_t indexBase = rowMap->getIndexBase(); + global_ordinal_t numDoFs = this->mat_->getGlobalNumRows(); + local_ordinal_t nRows = this->mat_->getLocalNumRows(); + local_ordinal_t nCols = colMap->getLocalNumElements(); + + RCP contiguous_t_mat; + // if-checks when to recompute contigRowMap & contigColMap + // TODO: this is currentlly based on the global matrix dimesions + if (contigRowMap->getGlobalNumElements() != numDoFs || contigColMap->getGlobalNumElements() != numDoFs) { + auto tmpMap = rcp (new contiguous_map_type (numDoFs, nRows, indexBase, rowComm)); + global_ordinal_t frow = tmpMap->getMinGlobalIndex(); + + // Create new GID list for RowMap + typedef Kokkos::DefaultHostExecutionSpace HostExecSpaceType; + Kokkos::View rowIndexList ("indexList", nRows); + for (local_ordinal_t k = 0; k < nRows; k++) { + rowIndexList(k) = frow+k; + } + // Create new GID list for ColMap + Kokkos::View colIndexList ("indexList", nCols); + typedef Tpetra::MultiVector gid_mv_t; + Teuchos::ArrayView rowIndexArray(rowIndexList.data(), nRows); + Teuchos::ArrayView colIndexArray(colIndexList.data(), nCols); + gid_mv_t row_mv (rowMap, rowIndexArray, nRows, 1); + gid_mv_t col_mv (colMap, colIndexArray, nCols, 1); + typedef Tpetra::Import import_t; + RCP importer = rcp (new import_t (rowMap, colMap)); + col_mv.doImport (row_mv, *importer, Tpetra::INSERT); + { + auto col_view = col_mv.getLocalViewHost(Tpetra::Access::ReadOnly); + for(int i=0; imat_->getLocalMatrixDevice(); + contiguous_t_mat = rcp( new matrix_t(contigRowMap, contigColMap, lclMatrix)); + } else { + // Build Matrix with contiguous Maps + auto lclMatrix = this->mat_->getLocalMatrixDevice(); + auto importer = this->mat_->getCrsGraph()->getImporter(); + auto exporter = this->mat_->getCrsGraph()->getExporter(); + contiguous_t_mat = rcp( new matrix_t(lclMatrix, contigRowMap, contigColMap, contigRowMap, contigColMap, importer,exporter)); + } + return rcp (new ConcreteMatrixAdapter (contiguous_t_mat)); + } + + template + void + ConcreteMatrixAdapter< + Tpetra::CrsMatrix + >::describe (Teuchos::FancyOStream& os, + const Teuchos::EVerbosityLevel verbLevel) const + { + this->mat_->describe(os, verbLevel); + } } // end namespace Amesos2 #endif // AMESOS2_TPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP diff --git a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp index 9eb848429c07..a2313e10e1a1 100644 --- a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp +++ b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_decl.hpp @@ -146,6 +146,7 @@ namespace Amesos2 { // different (cf subclasses of Tpetra::CrsMatrix), this method // hands off implementation to the adapter for the subclass RCP get_impl(const Teuchos::Ptr > map, EDistribution distribution = ROOTED) const; + RCP reindex_impl(Teuchos::RCP> &contigRowMap, Teuchos::RCP> &contigColMap) const; template void getSparseRowPtr_kokkos_view(KV & view) const { diff --git a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp index ec6cc767f8da..9b8a0ec16d68 100644 --- a/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp +++ b/packages/amesos2/src/Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp @@ -329,6 +329,21 @@ namespace Amesos2 { #endif } + + template + RCP > + AbstractConcreteMatrixAdapter< + Tpetra::RowMatrix, DerivedMat + >::reindex_impl(Teuchos::RCP> &contigRowMap, Teuchos::RCP> &contigColMap) const + { +#ifdef __CUDACC__ + // NVCC doesn't seem to like the static_cast, even though it is valid + return dynamic_cast*>(this)->reindex_impl(contigRowMap, contigColMap); +#else + return static_cast*>(this)->reindex_impl(contigRowMap, contigColMap); +#endif + } + } // end namespace Amesos2 #endif // AMESOS2_TPETRAROWMATRIX_ABSTRACTMATRIXADAPTER_DEF_HPP diff --git a/packages/framework/pr_tools/PullRequestLinuxDriver.sh b/packages/framework/pr_tools/PullRequestLinuxDriver.sh index d491f2dfd7bc..4198a87db296 100755 --- a/packages/framework/pr_tools/PullRequestLinuxDriver.sh +++ b/packages/framework/pr_tools/PullRequestLinuxDriver.sh @@ -274,6 +274,11 @@ then test_cmd_options+=( "--use-explicit-cachefile ") fi +if [[ ${GENCONFIG_BUILD_NAME} == *"framework"* ]] +then + test_cmd_options+=( "--skip-create-packageenables ") +fi + test_cmd="${PYTHON_EXE:?} ${REPO_ROOT:?}/packages/framework/pr_tools/PullRequestLinuxDriverTest.py ${test_cmd_options[@]}" # Call the script to launch the tests diff --git a/packages/framework/pr_tools/PullRequestLinuxDriverTest.py b/packages/framework/pr_tools/PullRequestLinuxDriverTest.py index 9bcefc2c5691..54e0e8bd8731 100755 --- a/packages/framework/pr_tools/PullRequestLinuxDriverTest.py +++ b/packages/framework/pr_tools/PullRequestLinuxDriverTest.py @@ -188,6 +188,14 @@ def parse_args(): default=default_filename_packageenables, help="{} Default={}".format(desc_package_enables, default_filename_packageenables)) + optional.add_argument('--skip-create-packageenables', + dest="skip_create_packageenables", + action="store_true", + default=False, + help="Skip the creation of the packageEnables.cmake fragment file generated by " + \ + "the TriBITS infrastructure indicating which packages are to be enabled based on file " + \ + "changes between a source and target branch. Default=False") + desc_subprojects_file = "The subprojects_file is used by the testing infrastructure. This parameter " + \ "allows the default, generated file, to be overridden. Generally this should " + \ "not be changed from the defaults." diff --git a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py index 3b585ad6eca6..9587b1f5fa97 100644 --- a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py +++ b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py @@ -278,6 +278,13 @@ def arg_filename_packageenables(self): """ return self.args.filename_packageenables + @property + def arg_skip_create_packageenables(self): + """ + This property controls whether the creation of a packageEnables.cmake fragment file + should be skipped. + """ + return self.args.skip_create_packageenables @property def arg_workspace_dir(self): @@ -634,7 +641,6 @@ def create_package_enables_file(self, dryrun=False): job_name = self.arg_pr_jenkins_job_name enable_map_entry = self.get_multi_property_from_config("ENABLE_MAP", job_name, delimeter=" ") - # Generate files using ATDM/TriBiTS Scripts if enable_map_entry is None: cmd = [os.path.join( self.arg_workspace_dir, @@ -745,6 +751,7 @@ def prepare_test(self): self.message("--- arg_ctest_driver = {}".format(self.arg_ctest_driver)) self.message("--- arg_ctest_drop_site = {}".format(self.arg_ctest_drop_site)) self.message("--- arg_ccache_enable = {}".format(self.arg_ccache_enable)) + self.message("--- arg_skip_create_packageenables = {}".format(self.arg_skip_create_packageenables)) self.message("") self.message("--- concurrency_build = {}".format(self.concurrency_build)) self.message("--- concurrency_test = {}".format(self.concurrency_test)) @@ -811,16 +818,22 @@ def prepare_test(self): self.message("| E N V I R O N M E N T S E T U P C O M P L E T E") self.message("+" + "-"*68 + "+") - self.message("+" + "-"*68 + "+") - self.message("| G e n e r a t e `packageEnables.cmake` S T A R T I N G") - self.message("+" + "-"*68 + "+") + if self.arg_skip_create_packageenables: + self.message("+" + "-"*68 + "+") + self.message("| S K I P P I N G `packageEnables.cmake` G E N E R A T I O N") + self.message("+" + "-"*68 + "+") - self.create_package_enables_file(dryrun=self.args.dry_run) + else: + self.message("+" + "-"*68 + "+") + self.message("| G e n e r a t e `packageEnables.cmake` S T A R T I N G") + self.message("+" + "-"*68 + "+") - self.message("+" + "-"*68 + "+") - self.message("| G e n e r a t e `packageEnables.cmake` C O M P L E T E D") - self.message("+" + "-"*68 + "+") - self.message("") + self.create_package_enables_file(dryrun=self.args.dry_run) + + self.message("+" + "-"*68 + "+") + self.message("| G e n e r a t e `packageEnables.cmake` C O M P L E T E D") + self.message("+" + "-"*68 + "+") + self.message("") return 0 diff --git a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py index 1f8038c6eaf5..d585aa7a0fb3 100644 --- a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py +++ b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py @@ -54,6 +54,15 @@ def execute_test(self): if not self.args.dry_run: gc.write_cmake_fragment() + if self.arg_skip_create_packageenables: + print("Optional --skip_create_packageenables found. " + + "Creating dummy packageEnables.cmake and package_subproject_list.cmake " + + "for CTest drivers.") + with open(self.arg_filename_packageenables, 'w'): + pass + with open(self.arg_filename_subprojects, 'w'): + pass + # Execute the call to ctest. verbosity_flag = "-VV" if "BUILD_NUMBER" in os.environ: diff --git a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py index 0d48f8d5b190..d1772481534a 100755 --- a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py +++ b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py @@ -225,6 +225,7 @@ def dummy_args(self): ctest_drop_site="testing.sandia.gov", filename_packageenables="../packageEnables.cmake", filename_subprojects="../package_subproject_list.cmake", + skip_create_packageenables=False, mode="standard", req_mem_per_core=3.0, max_cores_allowed=12, @@ -682,6 +683,21 @@ def test_TrilinosPRConfigurationBase_prepare_test(self): self.assertEqual(ret, 0) + def test_TrilinosPRConfigurationBase_prepare_test_skip_create_package_enables_file(self): + """ + Test that the prepare_test method does not call the member function create_package_enables_file + when skip_create_packageenables is True + """ + args = self.dummy_args() + args.skip_create_packageenables = True + pr_config = trilinosprhelpers.TrilinosPRConfigurationBase(args) + + pr_config.create_package_enables_file = Mock() + pr_config.prepare_test() + + pr_config.create_package_enables_file.assert_not_called() + + def test_TrilinosPRConfigurationBase_prepare_test_FAIL(self): """ Test the prepare_test method where it would fail due to diff --git a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationInstallation.py b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationInstallation.py index 4eac6b0ceeda..551a57aff301 100755 --- a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationInstallation.py +++ b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationInstallation.py @@ -168,6 +168,7 @@ def dummy_args(self): ctest_drop_site="testint.sandia.gov", filename_packageenables="../packageEnables.cmake", filename_subprojects="../package_subproject_list.cmake", + skip_create_packageenables=False, mode="standard", req_mem_per_core=3.0, max_cores_allowed=12, diff --git a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationStandard.py b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationStandard.py index 9a722b30cce8..47586711a32d 100755 --- a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationStandard.py +++ b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationStandard.py @@ -167,6 +167,7 @@ def dummy_args(self): ctest_drop_site="testing.sandia.gov", filename_packageenables="../packageEnables.cmake", filename_subprojects="../package_subproject_list.cmake", + skip_create_packageenables=False, mode="standard", req_mem_per_core=3.0, max_cores_allowed=12, diff --git a/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py b/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py index c02193f18a28..46cf59176cd8 100755 --- a/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py +++ b/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py @@ -80,6 +80,7 @@ def setUp(self): workspace_dir='/dev/null/Trilinos_clone', filename_packageenables='../packageEnables.cmake', filename_subprojects='../package_subproject_list.cmake', + skip_create_packageenables=False, test_mode='standard', req_mem_per_core=3.0, max_cores_allowed=12, diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp new file mode 100644 index 000000000000..d70534974a00 --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_BoundaryDetection.hpp @@ -0,0 +1,231 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_BOUNDARYDETECTION_HPP +#define MUELU_BOUNDARYDETECTION_HPP + +#include +#include +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" +#include "MueLu_LWGraph_kokkos.hpp" +#include "MueLu_Utilities.hpp" +#include "Teuchos_RCP.hpp" +#include "Xpetra_ConfigDefs.hpp" +#include "Xpetra_CrsGraph.hpp" +#include "Xpetra_MultiVector.hpp" + +namespace MueLu::BoundaryDetection { + +/*! + @class PointDirichletFunctor + @brief Functor for marking nodes as Dirichlet. + + A row is marked as Dirichlet boundary if fewer than dirichletNonzeroThreshold entries are larger in absolute value than dirichletThreshold. + It is assumed that boundaryNodes was initialized to false. +*/ +template +class PointDirichletFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + boundary_nodes_view boundaryNodes; + magnitudeType dirichletThreshold; + local_ordinal_type dirichletNonzeroThreshold; + + public: + PointDirichletFunctor(local_matrix_type& A_, boundary_nodes_view boundaryNodes_, magnitudeType dirichletThreshold_, local_ordinal_type dirichletNonzeroThreshold_) + : A(A_) + , boundaryNodes(boundaryNodes_) + , dirichletThreshold(dirichletThreshold_) + , dirichletNonzeroThreshold(dirichletNonzeroThreshold_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + local_ordinal_type nnz = 0; + for (local_ordinal_type k = 0; k < row.length; ++k) { + local_ordinal_type clid = row.colidx(k); + scalar_type val = row.value(k); + if ((rlid != static_cast(clid)) && (ATS::magnitude(val) > dirichletThreshold)) { + ++nnz; + if (nnz == dirichletNonzeroThreshold) { + return; + } + } + } + boundaryNodes(rlid) = true; + } +}; + +/*! + @class VectorDirichletFunctor + @brief Functor for marking nodes as Dirichlet in a block operator. + + Assumes a single fixed block size specified by blockSize. + Marks blocks as Dirichlet when one row is Dirichlet (useGreedyDirichlet==true) or when all rows are Dirichlet (useGreedyDirichlet==false). + A row is marked as Dirichlet boundary if fewer than dirichletNonzeroThreshold entries are larger in absolute value than dirichletThreshold. + It is assumed that boundaryNodes was initialized to false. +*/ +template +class VectorDirichletFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + local_ordinal_type blockSize; + boundary_nodes_view boundaryNodes; + magnitudeType dirichletThreshold; + local_ordinal_type dirichletNonzeroThreshold; + + public: + VectorDirichletFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, boundary_nodes_view boundaryNodes_, magnitudeType dirichletThreshold_, local_ordinal_type dirichletNonzeroThreshold_) + : A(A_) + , blockSize(blockSize_) + , boundaryNodes(boundaryNodes_) + , dirichletThreshold(dirichletThreshold_) + , dirichletNonzeroThreshold(dirichletNonzeroThreshold_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rblid) const { + for (local_ordinal_type rlid = rblid * blockSize; rlid < (rblid + 1) * blockSize; ++rlid) { + auto row = A.rowConst(rlid); + local_ordinal_type nnz = 0; + bool rowIsDirichlet = true; + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + auto val = row.value(k); + if ((rlid != static_cast(clid)) && (ATS::magnitude(val) > dirichletThreshold)) { + ++nnz; + if (nnz == dirichletNonzeroThreshold) { + rowIsDirichlet = false; + break; + } + } + } + if constexpr (useGreedyDirichlet) { + if (rowIsDirichlet) { + boundaryNodes(rblid) = true; + return; + } + } else { + if (!rowIsDirichlet) { + return; + } + } + } + if constexpr (!useGreedyDirichlet) + boundaryNodes(rblid) = true; + } +}; + +/*! + @class RowSumFunctor + @brief Functor for marking nodes as Dirichlet based on rowsum. + + A row is marked as Dirichlet boundary if the sum of off-diagonal values is smaller in absolute value than the diagonal multiplied by the threshold rowSumTol. + It is assumed that boundaryNodes was initialized to false. +*/ +template +class RowSumFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using magATS = Kokkos::ArithTraits; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + boundary_nodes_view boundaryNodes; + magnitudeType rowSumTol; + + public: + RowSumFunctor(local_matrix_type& A_, boundary_nodes_view boundaryNodes_, magnitudeType rowSumTol_) + : A(A_) + , boundaryNodes(boundaryNodes_) + , rowSumTol(rowSumTol_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + scalar_type rowsum = ATS::zero(); + scalar_type diagval = ATS::zero(); + auto row = A.rowConst(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + auto val = row.value(k); + if (rlid == static_cast(clid)) + diagval = val; + rowsum += val; + } + if (ATS::magnitude(rowsum) > ATS::magnitude(diagval) * rowSumTol) { + boundaryNodes(rlid) = true; + } + } +}; + +/*! + @class BoundaryFunctor + @brief Functor that serially applies sub-functors to rows. +*/ +template +class BoundaryFunctor { + private: + using local_ordinal_type = typename local_matrix_type::ordinal_type; + + Functor functor; + BoundaryFunctor remainingFunctors; + + public: + BoundaryFunctor(local_matrix_type& A_, Functor& functor_, RemainingFunctors&... remainingFunctors_) + : functor(functor_) + , remainingFunctors(A_, remainingFunctors_...) {} + + KOKKOS_FUNCTION void operator()(const local_ordinal_type rlid) const { + functor(rlid); + remainingFunctors(rlid); + } +}; + +template +class BoundaryFunctor { + private: + using local_ordinal_type = typename local_matrix_type::ordinal_type; + + local_matrix_type A; + Functor functor; + + public: + BoundaryFunctor(local_matrix_type& A_, Functor& functor_) + : A(A_) + , functor(functor_) {} + + KOKKOS_FUNCTION void operator()(const local_ordinal_type rlid) const { + functor(rlid); + } +}; + +} // namespace MueLu::BoundaryDetection + +#endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp new file mode 100644 index 000000000000..1ba7039a5129 --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_ClassicalDropping.hpp @@ -0,0 +1,203 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_CLASSICALDROPPING_HPP +#define MUELU_CLASSICALDROPPING_HPP + +#include "MueLu_DroppingCommon.hpp" +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" +#include "Xpetra_Matrix.hpp" +#include "MueLu_Utilities.hpp" + +namespace MueLu::ClassicalDropping { + +/*! + @class SAFunctor + @brief Classical smoothed aggregation dropping criterion + + Evaluates the dropping criterion + \f[ + \frac{|A_{ij}|^2}{|A_{ii}| |A_{jj}|} \le \theta^2 + \f] +*/ +template +class SAFunctor { + private: + using matrix_type = Xpetra::Matrix; + using diag_vec_type = Xpetra::MultiVector; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_view_type = typename Kokkos::DualView::t_dev; + + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + Teuchos::RCP diagVec; + diag_view_type diag; // corresponds to overlapped diagonal + magnitudeType eps; + results_view results; + + public: + SAFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , eps(threshold) + , results(results_) { + diagVec = Utilities::GetMatrixOverlappedDiagonal(A_); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + + auto val = row.value(k); + auto aiiajj = ATS::magnitude(diag(rlid)) * ATS::magnitude(diag(clid)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + + results(offset + k) = Kokkos::max((aij2 <= eps * eps * aiiajj) ? DROP : KEEP, + results(offset + k)); + } + } +}; + +/*! + @class SignedRSFunctor + @brief Signed classical Ruge-Stueben dropping criterion + + Evaluates the dropping criterion + \f[ + \frac{-\operatorname{Re}A_{ij}}{|A_{ii}|} \le \theta + \f] +*/ +template +class SignedRSFunctor { + private: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + using diag_vec_type = Xpetra::MultiVector; + using diag_view_type = typename Kokkos::DualView::t_dev; + + local_matrix_type A; + Teuchos::RCP diagVec; + diag_view_type diag; // corresponds to overlapped diagonal + magnitudeType eps; + results_view results; + + public: + SignedRSFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , eps(threshold) + , results(results_) { + diagVec = Utilities::GetMatrixMaxMinusOffDiagonal(A_); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto val = row.value(k); + auto neg_aij = -ATS::real(val); + auto max_neg_aik = eps * ATS::real(diag(rlid)); + results(offset + k) = Kokkos::max((neg_aij <= max_neg_aik) ? DROP : KEEP, + results(offset + k)); + } + } +}; + +/*! + @class SignedSAFunctor + @brief Signed classical smoothed aggregation dropping criterion + + Evaluates the dropping criterion + \f[ + \frac{-\operatorname{sign}(A_{ij}) |A_{ij}|^2}{|A_{ii}| |A_{jj}|} \le \theta^2 + \f] +*/ +template +class SignedSAFunctor { + private: + using matrix_type = Xpetra::Matrix; + using diag_vec_type = Xpetra::MultiVector; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_view_type = typename Kokkos::DualView::t_dev; + + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using mATS = Kokkos::ArithTraits; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + Teuchos::RCP diagVec; + diag_view_type diag; // corresponds to overlapped diagonal + magnitudeType eps; + results_view results; + + public: + SignedSAFunctor(matrix_type& A_, magnitudeType threshold, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , eps(threshold) + , results(results_) { + // Construct ghosted matrix diagonal + diagVec = Utilities::GetMatrixOverlappedDiagonal(A_); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + + auto val = row.value(k); + auto aiiajj = ATS::magnitude(diag(rlid)) * ATS::magnitude(diag(clid)); // |a_ii|*|a_jj| + const bool is_nonpositive = ATS::real(val) <= mATS::zero(); + magnitudeType aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0 + if (is_nonpositive) + aij2 = -aij2; + results(offset + k) = Kokkos::max((aij2 <= eps * eps * aiiajj) ? DROP : KEEP, + results(offset + k)); + } + } +}; + +} // namespace MueLu::ClassicalDropping + +#endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp index 6c8e857d6daf..d848a823743c 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp @@ -348,6 +348,10 @@ void CoalesceDropFactory::Build(Level } else GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + + if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos))) + TEUCHOS_TEST_FOR_EXCEPTION(realThreshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0"); + Set(currentLevel, "Filtering", (threshold != STS::zero())); const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); @@ -439,7 +443,7 @@ void CoalesceDropFactory::Build(Level using MT = typename STS::magnitudeType; RCP ghostedDiag; ArrayRCP ghostedDiagVals; - ArrayRCP negMaxOffDiagonal; + ArrayRCP negMaxOffDiagonal; // RS style needs the max negative off-diagonal, SA style needs the diagonal if (useSignedClassicalRS) { if (ghostedBlockNumber.is_null()) { diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp index 25da3f2f96bc..e2c1e1abb34d 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_decl.hpp @@ -20,7 +20,7 @@ #include "MueLu_AmalgamationInfo_fwd.hpp" #include "MueLu_Level_fwd.hpp" -#include "MueLu_LWGraph_kokkos_fwd.hpp" +#include "MueLu_LWGraph_kokkos_decl.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" #include "MueLu_Utilities_fwd.hpp" @@ -102,6 +102,8 @@ class CoalesceDropFactory_kokkos using node_type = Node; private: + using boundary_nodes_type = typename MueLu::LWGraph_kokkos::boundary_nodes_type; + // For compatibility #undef MUELU_COALESCEDROPFACTORY_KOKKOS_SHORT #include "MueLu_UseShortNames.hpp" @@ -128,6 +130,12 @@ class CoalesceDropFactory_kokkos //@} void Build(Level& currentLevel) const; + + std::tuple, RCP > GetBlockNumberMVs(Level& currentLevel) const; + + std::tuple BuildScalar(Level& currentLevel) const; + + std::tuple BuildVector(Level& currentLevel) const; }; } // namespace MueLu diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp index 8696993bde33..3f9d157701d6 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp @@ -12,6 +12,7 @@ #include #include +#include #include "Xpetra_Matrix.hpp" @@ -23,435 +24,682 @@ #include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" -#include "MueLu_Utilities.hpp" + +// #define MUELU_COALESCE_DROP_DEBUG 1 + +#include "MueLu_BoundaryDetection.hpp" +#include "MueLu_ClassicalDropping.hpp" +#include "MueLu_CutDrop.hpp" +#include "MueLu_DroppingCommon.hpp" +#include "MueLu_DistanceLaplacianDropping.hpp" +#include "MueLu_MatrixConstruction.hpp" namespace MueLu { -namespace CoalesceDrop_Kokkos_Details { // anonymous +template +RCP CoalesceDropFactory_kokkos::GetValidParameterList() const { + RCP validParamList = rcp(new ParameterList()); + +#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) + SET_VALID_ENTRY("aggregation: drop tol"); + SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); + SET_VALID_ENTRY("aggregation: Dirichlet threshold"); + SET_VALID_ENTRY("aggregation: greedy Dirichlet"); + SET_VALID_ENTRY("aggregation: row sum drop tol"); + SET_VALID_ENTRY("aggregation: drop scheme"); + SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize"); + SET_VALID_ENTRY("aggregation: distance laplacian directional weights"); + SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); + SET_VALID_ENTRY("aggregation: distance laplacian algo"); + SET_VALID_ENTRY("aggregation: classical algo"); + SET_VALID_ENTRY("aggregation: coloring: localize color graph"); + + SET_VALID_ENTRY("filtered matrix: use lumping"); + SET_VALID_ENTRY("filtered matrix: reuse graph"); + SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); -template -class ScanFunctor { - public: - ScanFunctor(RowType rows_) - : rows(rows_) {} + SET_VALID_ENTRY("filtered matrix: use root stencil"); + SET_VALID_ENTRY("filtered matrix: use spread lumping"); + SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor"); + SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap"); + SET_VALID_ENTRY("filtered matrix: Dirichlet threshold"); - KOKKOS_INLINE_FUNCTION - void operator()(const LO i, LO& upd, const bool& final) const { - upd += rows(i); - if (final) - rows(i) = upd; - } +#undef SET_VALID_ENTRY + validParamList->set("lightweight wrap", true, "Experimental option for lightweight graph access"); + + // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values) + validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical")))); + validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple("default", "unscaled cut", "scaled cut", "scaled cut symmetric")))); + validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple("default", "unscaled cut", "scaled cut", "scaled cut symmetric")))); + + validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A"); + validParamList->set>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); + validParamList->set>("Coordinates", Teuchos::null, "Generating factory for Coordinates"); + validParamList->set>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber"); + + return validParamList; +} - private: - RowType rows; -}; - -template -class ClassicalDropFunctor { - private: - typedef typename GhostedViewType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) - magnitudeType eps; - - public: - ClassicalDropFunctor(GhostedViewType ghostedDiag, magnitudeType threshold) - : diag(ghostedDiag) - , eps(threshold) {} - - // Return true if we drop, false if not - KOKKOS_FORCEINLINE_FUNCTION - bool operator()(LO row, LO col, SC val) const { - // We avoid square root by using squared values - auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| - auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 - - return (aij2 <= eps * eps * aiiajj); +template +void CoalesceDropFactory_kokkos::DeclareInput(Level& currentLevel) const { + Input(currentLevel, "A"); + Input(currentLevel, "UnAmalgamationInfo"); + + const ParameterList& pL = GetParameterList(); + std::string algo = pL.get("aggregation: drop scheme"); + if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") { + Input(currentLevel, "Coordinates"); } -}; - -template -class DistanceFunctor { - private: - typedef typename CoordsType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - typedef SC value_type; - - public: - DistanceFunctor(CoordsType coords_) - : coords(coords_) {} - - KOKKOS_INLINE_FUNCTION - magnitudeType distance2(LO row, LO col) const { - SC d = ATS::zero(), s; - for (size_t j = 0; j < coords.extent(1); j++) { - s = coords(row, j) - coords(col, j); - d += s * s; - } - return ATS::magnitude(d); + if (algo == "signed classical sa") + ; + else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) { + Input(currentLevel, "BlockNumber"); } +} - private: - CoordsType coords; -}; - -template -class DistanceLaplacianDropFunctor { - private: - typedef typename GhostedViewType::value_type SC; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - DistanceLaplacianDropFunctor(GhostedViewType ghostedLaplDiag, DistanceFunctor distFunctor_, magnitudeType threshold) - : diag(ghostedLaplDiag) - , distFunctor(distFunctor_) - , eps(threshold) {} - - // Return true if we drop, false if not - KOKKOS_INLINE_FUNCTION - bool operator()(LO row, LO col, SC /* val */) const { - // We avoid square root by using squared values - - // We ignore incoming value of val as we operate on an auxiliary - // distance Laplacian matrix - typedef typename DistanceFunctor::value_type dSC; - typedef Kokkos::ArithTraits dATS; - auto fval = dATS::one() / distFunctor.distance2(row, col); - - auto aiiajj = ATS::magnitude(diag(row, 0)) * ATS::magnitude(diag(col, 0)); // |a_ii|*|a_jj| - auto aij2 = ATS::magnitude(fval) * ATS::magnitude(fval); // |a_ij|^2 - - return (aij2 <= eps * eps * aiiajj); - } +template +void CoalesceDropFactory_kokkos:: + Build(Level& currentLevel) const { + auto A = Get>(currentLevel, "A"); + TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()"); + LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize(); - private: - GhostedViewType diag; // corresponds to overlapped diagonal multivector (2D View) - DistanceFunctor distFunctor; - magnitudeType eps; -}; - -template -class ScalarFunctor { - private: - typedef typename MatrixType::StaticCrsGraphType graph_type; - typedef typename graph_type::row_map_type rows_type; - typedef typename graph_type::entries_type cols_type; - typedef typename MatrixType::values_type vals_type; - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::val_type impl_Scalar; - typedef Kokkos::ArithTraits impl_ATS; - typedef typename ATS::magnitudeType magnitudeType; - - public: - ScalarFunctor(MatrixType A_, BndViewType bndNodes_, DropFunctorType dropFunctor_, - typename rows_type::non_const_type rows_, - typename cols_type::non_const_type colsAux_, - typename vals_type::non_const_type valsAux_, - bool reuseGraph_, bool lumping_, SC /* threshold_ */, - bool aggregationMayCreateDirichlet_) - : A(A_) - , bndNodes(bndNodes_) - , dropFunctor(dropFunctor_) - , rows(rows_) - , colsAux(colsAux_) - , valsAux(valsAux_) - , reuseGraph(reuseGraph_) - , lumping(lumping_) - , aggregationMayCreateDirichlet(aggregationMayCreateDirichlet_) { - rowsA = A.graph.row_map; - zero = impl_ATS::zero(); - } + std::tuple results; + if (blkSize == 1) + results = BuildScalar(currentLevel); + else + results = BuildVector(currentLevel); - KOKKOS_INLINE_FUNCTION - void operator()(const LO row, LO& nnz) const { - auto rowView = A.rowConst(row); - auto length = rowView.length; - auto offset = rowsA(row); + if (GetVerbLevel() & Statistics1) { + GlobalOrdinal numDropped = std::get<0>(results); + auto boundaryNodes = std::get<1>(results); - impl_Scalar diag = zero; - LO rownnz = 0; - LO diagID = -1; - for (decltype(length) colID = 0; colID < length; colID++) { - LO col = rowView.colidx(colID); - impl_Scalar val = rowView.value(colID); + GO numLocalBoundaryNodes = 0; + GO numGlobalBoundaryNodes = 0; - if ((!bndNodes(row) && !dropFunctor(row, col, rowView.value(colID))) || row == col) { - colsAux(offset + rownnz) = col; + Kokkos::parallel_reduce( + "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)), + KOKKOS_LAMBDA(const LO i, GO& n) { + if (boundaryNodes(i)) + n++; + }, + numLocalBoundaryNodes); - LO valID = (reuseGraph ? colID : rownnz); - valsAux(offset + valID) = val; - if (row == col) - diagID = valID; + auto comm = A->getRowMap()->getComm(); + MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - rownnz++; + GO numGlobalTotal = A->getGlobalNumEntries(); + GO numGlobalDropped; + MueLu_sumAll(comm, numDropped, numGlobalDropped); - } else { - // Rewrite with zeros (needed for reuseGraph) - valsAux(offset + colID) = zero; - diag += val; - } + GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; + if (numGlobalTotal != 0) { + GetOStream(Statistics1) << "Number of dropped entries: " + << numGlobalDropped << "/" << numGlobalTotal + << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)" << std::endl; } - // How to assert on the device? - // assert(diagIndex != -1); - rows(row + 1) = rownnz; - // if (lumping && diagID != -1) { - if (lumping) { - // Add diag to the diagonal + } +} - // NOTE_KOKKOS: valsAux was allocated with - // ViewAllocateWithoutInitializing. This is not a problem here - // because we explicitly set this value above. - valsAux(offset + diagID) += diag; - } +template +std::tuple>, Teuchos::RCP>> CoalesceDropFactory_kokkos:: + GetBlockNumberMVs(Level& currentLevel) const { + RCP BlockNumber = Get>(currentLevel, "BlockNumber"); + RCP ghostedBlockNumber; + GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl; + + // Ghost the column block numbers if we need to + auto A = Get>(currentLevel, "A"); + RCP importer = A->getCrsGraph()->getImporter(); + if (!importer.is_null()) { + SubFactoryMonitor m1(*this, "Block Number import", currentLevel); + ghostedBlockNumber = Xpetra::VectorFactory::Build(importer->getTargetMap()); + ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT); + } else { + ghostedBlockNumber = BlockNumber; + } + return std::make_tuple(BlockNumber, ghostedBlockNumber); +} - // If the only element remaining after filtering is diagonal, mark node as boundary - // FIXME: this should really be replaced by the following - // if (indices.size() == 1 && indices[0] == row) - // boundaryNodes[row] = true; - // We do not do it this way now because there is no framework for distinguishing isolated - // and boundary nodes in the aggregation algorithms - bndNodes(row) |= (rownnz == 1 && aggregationMayCreateDirichlet); +template +std::tuple::boundary_nodes_type> CoalesceDropFactory_kokkos:: + BuildScalar(Level& currentLevel) const { + FactoryMonitor m(*this, "Build", currentLevel); + + using MatrixType = Xpetra::CrsMatrix; + using GraphType = Xpetra::CrsGraph; + using local_matrix_type = typename MatrixType::local_matrix_type; + using local_graph_type = typename GraphType::local_graph_type; + using rowptr_type = typename local_graph_type::row_map_type::non_const_type; + using entries_type = typename local_graph_type::entries_type::non_const_type; + using values_type = typename local_matrix_type::values_type::non_const_type; + using device_type = typename Node::device_type; + using memory_space = typename device_type::memory_space; + + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType MT; + const MT zero = Teuchos::ScalarTraits::zero(); + + auto A = Get>(currentLevel, "A"); - nnz += rownnz; + ////////////////////////////////////////////////////////////////////// + // Process parameterlist + const ParameterList& pL = GetParameterList(); + + // Boundary detection + const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + const typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + const LocalOrdinal dirichletNonzeroThreshold = 1; + + // Dropping + const std::string algo = pL.get("aggregation: drop scheme"); + std::string classicalAlgoStr = pL.get("aggregation: classical algo"); + std::string distanceLaplacianAlgoStr = pL.get("aggregation: distance laplacian algo"); + MT threshold; + // If we're doing the ML-style halving of the drop tol at each level, we do that here. + if (pL.get("aggregation: use ml scaling of drop tol")) + threshold = pL.get("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID()); + else + threshold = as(pL.get("aggregation: drop tol")); + bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + + // Fill + const bool lumping = pL.get("filtered matrix: use lumping"); + const bool reuseGraph = pL.get("filtered matrix: reuse graph"); + const bool reuseEigenvalue = pL.get("filtered matrix: reuse eigenvalue"); + + const bool useRootStencil = pL.get("filtered matrix: use root stencil"); + const bool useSpreadLumping = pL.get("filtered matrix: use spread lumping"); + TEUCHOS_ASSERT(!useRootStencil); + TEUCHOS_ASSERT(!useSpreadLumping); + + if (algo == "classical") + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + else if (algo == "distance laplacian") + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + else + GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + + if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos))) + TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0"); + + // FIXME: Non-Kokkos implementation does this, but this seems unnecessary. + if (algo == "distance laplacian") + aggregationMayCreateDirichlet = true; + + ////////////////////////////////////////////////////////////////////// + // We perform four sweeps over the rows of A: + // Pass 1: detection of boundary nodes + // Pass 2: diagonal extraction + // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix + // Pass 4: fill of the filtered matrix + // + // Pass 1 and 3 apply a sequence of criteria to each row of the matrix. + + // TODO: We could merge pass 1 and 2. + + auto crsA = rcp_dynamic_cast(A, true)->getCrsMatrix(); + auto lclA = crsA->getLocalMatrixDevice(); + auto range = range_type(0, lclA.numRows()); + + ////////////////////////////////////////////////////////////////////// + // Pass 1: Detect boundary nodes + // + // The following criteria are available: + // - BoundaryDetection::PointDirichletFunctor + // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries + // - BoundaryDetection::RowSumFunctor + // Marks rows as Dirichlet bases on row-sum criterion + + // Dirichlet nodes + auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false + { + SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel); + + // macro that applies boundary detection functors +#define MueLu_runBoundaryFunctors(...) \ + { \ + auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \ + Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \ } - private: - MatrixType A; - BndViewType bndNodes; - DropFunctorType dropFunctor; - - rows_type rowsA; - - typename rows_type::non_const_type rows; - typename cols_type::non_const_type colsAux; - typename vals_type::non_const_type valsAux; - - bool reuseGraph; - bool lumping; - bool aggregationMayCreateDirichlet; - impl_Scalar zero; -}; - -// collect number nonzeros of blkSize rows in nnz_(row+1) -template -class Stage1aVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - - public: - Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) - : kokkosMatrix(kokkosMatrix_) - , nnz(nnz_) - , blkSize(blkSize_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const LO row, LO& totalnnz) const { - // the following code is more or less what MergeRows is doing - // count nonzero entries in all dof rows associated with node row - LO nodeRowMaxNonZeros = 0; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(row * blkSize + j); - nodeRowMaxNonZeros += rowView.length; + auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold); + + if (rowSumTol <= 0.) { + MueLu_runBoundaryFunctors(dirichlet_detection); + } else { + auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol); + MueLu_runBoundaryFunctors(dirichlet_detection, + apply_rowsum); } - nnz(row + 1) = nodeRowMaxNonZeros; - totalnnz += nodeRowMaxNonZeros; +#undef MueLu_runBoundaryFunctors } + // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true. + // Otherwise we're now done with it now. + + ////////////////////////////////////////////////////////////////////// + // Pass 2 & 3: Diagonal extraction and determine dropping and construct + // rowptr of filtered matrix + // + // The following criteria are available: + // - Misc::PointwiseDropBoundaryFunctor + // Drop all rows that have been marked as Dirichlet + // - Misc::DropOffRankFunctor + // Drop all entries that are off-rank + // - ClassicalDropping::SAFunctor + // Classical dropping + // - ClassicalDropping::SignedRSFunctor + // Classical RS dropping + // - ClassicalDropping::SignedSAFunctor + // Classical signed SA dropping + // - DistanceLaplacian::DropFunctor + // Distance Laplacian dropping + // - Misc::KeepDiagonalFunctor + // Mark diagonal as KEEP + // - Misc::MarkSingletonFunctor + // Mark singletons after dropping as Dirichlet + // - Misc::BlockDiagonalizeFunctor + // Drop coupling between blocks + // + // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms. + + // rowptr of filtered A + auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1); + // Number of nonzeros of filtered A + LocalOrdinal nnz_filtered = 0; + // dropping decisions for each entry + auto results = Kokkos::View("results", lclA.nnz()); // initialized to UNDECIDED + { + SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel); - private: - MatrixType kokkosMatrix; //< local matrix part - NnzType nnz; //< View containing number of nonzeros for current row - blkSizeType blkSize; //< block size (or partial block size in strided maps) -}; - -// build the dof-based column map containing the local dof ids belonging to blkSize rows in matrix -// sort column ids -// translate them into (unique) node ids -// count the node column ids per node row -template -class Stage1bcVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - - private: - MatrixType kokkosMatrix; //< local matrix part - NnzType coldofnnz; //< view containing start and stop indices for subviews - blkSizeType blkSize; //< block size (or partial block size in strided maps) - ColDofType coldofs; //< view containing the local dof ids associated with columns for the blkSize rows (not sorted) - Dof2NodeTranslationType dof2node; //< view containing the local node id associated with the local dof id - NnzType colnodennz; //< view containing number of column nodes for each node row - BdryNodeTypeConst dirichletdof; //< view containing with num dofs booleans. True if dof (not necessarily entire node) is dirichlet boundardy dof. - BdryNodeType bdrynode; //< view containing with numNodes booleans. True if node is (full) dirichlet boundardy node. - boolType usegreedydirichlet; //< boolean for use of greedy Dirichlet (if any dof is Dirichlet, entire node is dirichlet) default false (need all dofs in node to be Dirichlet for node to be Dirichlet) - - public: - Stage1bcVectorFunctor(MatrixType kokkosMatrix_, - NnzType coldofnnz_, - blkSizeType blkSize_, - ColDofType coldofs_, - Dof2NodeTranslationType dof2node_, - NnzType colnodennz_, - BdryNodeTypeConst dirichletdof_, - BdryNodeType bdrynode_, - boolType usegreedydirichlet_) - : kokkosMatrix(kokkosMatrix_) - , coldofnnz(coldofnnz_) - , blkSize(blkSize_) - , coldofs(coldofs_) - , dof2node(dof2node_) - , colnodennz(colnodennz_) - , dirichletdof(dirichletdof_) - , bdrynode(bdrynode_) - , usegreedydirichlet(usegreedydirichlet_) { - } + std::string functorLabel = "MueLu::CoalesceDrop::CountEntries"; - KOKKOS_INLINE_FUNCTION - void operator()(const LO rowNode, LO& nnz) const { - LO pos = coldofnnz(rowNode); - if (usegreedydirichlet) { - bdrynode(rowNode) = false; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(rowNode * blkSize + j); - auto numIndices = rowView.length; - - // if any dof in the node is Dirichlet - if (dirichletdof(rowNode * blkSize + j)) - bdrynode(rowNode) = true; - - for (decltype(numIndices) k = 0; k < numIndices; k++) { - auto dofID = rowView.colidx(k); - coldofs(pos) = dofID; - pos++; + // macro that applied dropping functors +#if !defined(HAVE_MUELU_DEBUG) +#define MueLu_runDroppingFunctors(...) \ + { \ + auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__); \ + Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz_filtered); \ + } +#else +#define MueLu_runDroppingFunctors(...) \ + { \ + auto debug = Misc::DebugFunctor(lclA, results); \ + auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__, debug); \ + Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz_filtered); \ + } +#endif + + auto drop_boundaries = Misc::PointwiseDropBoundaryFunctor(lclA, boundaryNodes, results); + + if (threshold != zero) { + auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results); + auto mark_singletons_as_boundary = Misc::MarkSingletonFunctor(lclA, boundaryNodes, results); + + if (algo == "classical" || algo == "block diagonal classical") { + if (algo == "block diagonal classical") { + auto BlockNumbers = GetBlockNumberMVs(currentLevel); + auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results); + + if (classicalAlgoStr == "default") { + auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(block_diagonalize, + classical_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + MueLu_runDroppingFunctors(block_diagonalize, + classical_dropping, + drop_boundaries, + preserve_diagonals); + } + } else if (classicalAlgoStr == "unscaled cut") { + auto comparison = CutDrop::UnscaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(block_diagonalize, + drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (classicalAlgoStr == "scaled cut") { + auto comparison = CutDrop::ScaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(block_diagonalize, + drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (classicalAlgoStr == "scaled cut symmetric") { + auto comparison = CutDrop::ScaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(block_diagonalize, + drop_boundaries, + preserve_diagonals, + cut_drop); + + auto symmetrize = Misc::SymmetrizeFunctor(lclA, results); + + MueLu_runDroppingFunctors(symmetrize); + + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\""); + } + } else { + if (classicalAlgoStr == "default") { + auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(classical_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + MueLu_runDroppingFunctors(classical_dropping, + drop_boundaries, + preserve_diagonals); + } + } else if (classicalAlgoStr == "unscaled cut") { + auto comparison = CutDrop::UnscaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (classicalAlgoStr == "scaled cut") { + auto comparison = CutDrop::ScaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (classicalAlgoStr == "scaled cut symmetric") { + auto comparison = CutDrop::ScaledComparison(*A, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(drop_boundaries, + preserve_diagonals, + cut_drop); + + auto symmetrize = Misc::SymmetrizeFunctor(lclA, results); + + MueLu_runDroppingFunctors(symmetrize); + + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\""); + } } - } - } else { - bdrynode(rowNode) = true; - for (LO j = 0; j < blkSize; j++) { - auto rowView = kokkosMatrix.row(rowNode * blkSize + j); - auto numIndices = rowView.length; - - // if any dof in the node is not Dirichlet - if (dirichletdof(rowNode * blkSize + j) == false) - bdrynode(rowNode) = false; - - for (decltype(numIndices) k = 0; k < numIndices; k++) { - auto dofID = rowView.colidx(k); - coldofs(pos) = dofID; - pos++; + } else if (algo == "signed classical" || algo == "block diagonal signed classical" || algo == "block diagonal colored signed classical") { + auto signed_classical_rs_dropping = ClassicalDropping::SignedRSFunctor(*A, threshold, results); + + if (algo == "block diagonal signed classical" || algo == "block diagonal colored signed classical") { + auto BlockNumbers = GetBlockNumberMVs(currentLevel); + auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results); + + if (classicalAlgoStr == "default") { + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(block_diagonalize, + signed_classical_rs_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + + } else { + MueLu_runDroppingFunctors(block_diagonalize, + signed_classical_rs_dropping, + drop_boundaries, + preserve_diagonals); + } + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\""); + } + } else { + if (classicalAlgoStr == "default") { + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(signed_classical_rs_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + + } else { + MueLu_runDroppingFunctors(signed_classical_rs_dropping, + drop_boundaries, + preserve_diagonals); + } + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\""); + } } - } - } - - // sort coldofs - LO begin = coldofnnz(rowNode); - LO end = coldofnnz(rowNode + 1); - LO n = end - begin; - for (LO i = 0; i < (n - 1); i++) { - for (LO j = 0; j < (n - i - 1); j++) { - if (coldofs(j + begin) > coldofs(j + begin + 1)) { - LO temp = coldofs(j + begin); - coldofs(j + begin) = coldofs(j + begin + 1); - coldofs(j + begin + 1) = temp; + } else if (algo == "signed classical sa") { + if (classicalAlgoStr == "default") { + auto signed_classical_sa_dropping = ClassicalDropping::SignedSAFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(signed_classical_sa_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + + } else { + MueLu_runDroppingFunctors(signed_classical_sa_dropping, + drop_boundaries, + preserve_diagonals); + } + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\""); } + } else if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") { + using doubleMultiVector = Xpetra::MultiVector::magnitudeType, LO, GO, NO>; + auto coords = Get>(currentLevel, "Coordinates"); + + auto dist2 = DistanceLaplacian::DistanceFunctor(*A, coords); + + if (algo == "block diagonal distance laplacian") { + auto BlockNumbers = GetBlockNumberMVs(currentLevel); + auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results); + + if (distanceLaplacianAlgoStr == "default") { + auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results); + + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(block_diagonalize, + dist_laplacian_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + MueLu_runDroppingFunctors(block_diagonalize, + dist_laplacian_dropping, + drop_boundaries, + preserve_diagonals); + } + } else if (distanceLaplacianAlgoStr == "unscaled cut") { + auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(block_diagonalize, + drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (distanceLaplacianAlgoStr == "scaled cut") { + auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(block_diagonalize, + drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") { + auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(block_diagonalize, + drop_boundaries, + cut_drop, + preserve_diagonals); + + auto symmetrize = Misc::SymmetrizeFunctor(lclA, results); + + MueLu_runDroppingFunctors(symmetrize); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\""); + } + } else { + if (distanceLaplacianAlgoStr == "default") { + auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results); + + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(dist_laplacian_dropping, + drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + MueLu_runDroppingFunctors(dist_laplacian_dropping, + drop_boundaries, + preserve_diagonals); + } + } else if (distanceLaplacianAlgoStr == "unscaled cut") { + auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (distanceLaplacianAlgoStr == "scaled cut") { + auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(drop_boundaries, + preserve_diagonals, + cut_drop); + } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") { + auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results); + auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); + + MueLu_runDroppingFunctors(drop_boundaries, + preserve_diagonals, + cut_drop); + + auto symmetrize = Misc::SymmetrizeFunctor(lclA, results); + + MueLu_runDroppingFunctors(symmetrize); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\""); + } + } + } else if (algo == "block diagonal") { + auto BlockNumbers = GetBlockNumberMVs(currentLevel); + auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results); + + MueLu_runDroppingFunctors(block_diagonalize); + } else { + TEUCHOS_ASSERT(false); } + } else { + Kokkos::deep_copy(results, KEEP); + // FIXME: This seems inconsistent + // MueLu_runDroppingFunctors(drop_boundaries); + auto no_op = Misc::NoOpFunctor(); + MueLu_runDroppingFunctors(no_op); } - size_t cnt = 0; - LO lastNodeID = -1; - for (LO i = 0; i < n; i++) { - LO dofID = coldofs(begin + i); - LO nodeID = dof2node(dofID); - if (nodeID != lastNodeID) { - lastNodeID = nodeID; - coldofs(begin + cnt) = nodeID; - cnt++; - } - } - colnodennz(rowNode + 1) = cnt; - nnz += cnt; - } -}; - -// fill column node id view -template -class Stage1dVectorFunctor { - private: - typedef typename MatrixType::ordinal_type LO; - typedef typename MatrixType::value_type SC; - - private: - ColDofType coldofs; //< view containing mixed node and dof indices (only input) - ColDofNnzType coldofnnz; //< view containing the start and stop indices for subviews (dofs) - ColNodeType colnodes; //< view containing the local node ids associated with columns - ColNodeNnzType colnodennz; //< view containing start and stop indices for subviews - - public: - Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) - : coldofs(coldofs_) - , coldofnnz(coldofnnz_) - , colnodes(colnodes_) - , colnodennz(colnodennz_) { +#undef MueLu_runDroppingFunctors } + GO numDropped = lclA.nnz() - nnz_filtered; + // We now know the number of entries of filtered A and have the final rowptr. + + ////////////////////////////////////////////////////////////////////// + // Pass 4: Create local matrix for filtered A + // + // Dropped entries are optionally lumped to the diagonal. - KOKKOS_INLINE_FUNCTION - void operator()(const LO rowNode) const { - auto dofbegin = coldofnnz(rowNode); - auto nodebegin = colnodennz(rowNode); - auto nodeend = colnodennz(rowNode + 1); - auto n = nodeend - nodebegin; + RCP filteredA; + RCP graph; + { + SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel); - for (decltype(nodebegin) i = 0; i < n; i++) { - colnodes(nodebegin + i) = coldofs(dofbegin + i); + local_matrix_type lclFilteredA; + local_graph_type lclGraph; + if (reuseGraph) { + filteredA = MatrixFactory::BuildCopy(A); + lclFilteredA = filteredA->getLocalMatrixDevice(); + + auto colidx = entries_type("entries", nnz_filtered); + lclGraph = local_graph_type(colidx, filtered_rowptr); + } else { + auto colidx = entries_type("entries", nnz_filtered); + auto values = values_type("values", nnz_filtered); + lclFilteredA = local_matrix_type("filteredA", + lclA.numRows(), lclA.numCols(), + nnz_filtered, + values, filtered_rowptr, colidx); } - } -}; -} // namespace CoalesceDrop_Kokkos_Details + if (lumping) { + if (reuseGraph) { + auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor(lclA, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor); + } else { + auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor(lclA, results, lclFilteredA); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor); + } + } else { + if (reuseGraph) { + auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor(lclA, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor); + } else { + auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor(lclA, results, lclFilteredA); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor); + } + } -template -RCP CoalesceDropFactory_kokkos::GetValidParameterList() const { - RCP validParamList = rcp(new ParameterList()); + if (!reuseGraph) + filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap()); + filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); -#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) - SET_VALID_ENTRY("aggregation: drop tol"); - SET_VALID_ENTRY("aggregation: Dirichlet threshold"); - SET_VALID_ENTRY("aggregation: drop scheme"); - SET_VALID_ENTRY("aggregation: dropping may create Dirichlet"); - SET_VALID_ENTRY("aggregation: greedy Dirichlet"); - SET_VALID_ENTRY("filtered matrix: use lumping"); - SET_VALID_ENTRY("filtered matrix: reuse graph"); - SET_VALID_ENTRY("filtered matrix: reuse eigenvalue"); - SET_VALID_ENTRY("aggregation: use ml scaling of drop tol"); - { - validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple("classical", "distance laplacian")))); - } -#undef SET_VALID_ENTRY - validParamList->set>("A", Teuchos::null, "Generating factory of the matrix A"); - validParamList->set>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo"); - validParamList->set>("Coordinates", Teuchos::null, "Generating factory for Coordinates"); + if (reuseEigenvalue) { + // Reuse max eigenvalue from A + // It is unclear what eigenvalue is the best for the smoothing, but we already may have + // the D^{-1}A estimate in A, may as well use it. + // NOTE: ML does that too + filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); + } else { + filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); + } - return validParamList; -} + if (!reuseGraph) { + // Use graph of filteredA as graph. + lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice(); + } + graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A")); + graph->SetBoundaryNodeMap(boundaryNodes); + } -template -void CoalesceDropFactory_kokkos::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "UnAmalgamationInfo"); + LO dofsPerNode = 1; + Set(currentLevel, "DofsPerNode", dofsPerNode); + Set(currentLevel, "Graph", graph); + Set(currentLevel, "A", filteredA); - const ParameterList& pL = GetParameterList(); - if (pL.get("aggregation: drop scheme") == "distance laplacian") - Input(currentLevel, "Coordinates"); + return std::make_tuple(numDropped, boundaryNodes); } template -void CoalesceDropFactory_kokkos:: - Build(Level& currentLevel) const { +std::tuple::boundary_nodes_type> CoalesceDropFactory_kokkos:: + BuildVector(Level& currentLevel) const { FactoryMonitor m(*this, "Build", currentLevel); + using MatrixType = Xpetra::CrsMatrix; + using GraphType = Xpetra::CrsGraph; + using local_matrix_type = typename MatrixType::local_matrix_type; + using local_graph_type = typename GraphType::local_graph_type; + using rowptr_type = typename local_graph_type::row_map_type::non_const_type; + using entries_type = typename local_graph_type::entries_type::non_const_type; + using values_type = typename local_matrix_type::values_type::non_const_type; + using device_type = typename Node::device_type; + using memory_space = typename device_type::memory_space; + typedef Teuchos::ScalarTraits STS; typedef typename STS::magnitudeType MT; const MT zero = Teuchos::ScalarTraits::zero(); @@ -480,276 +728,339 @@ void CoalesceDropFactory_kokkos:: auto amalInfo = Get>(currentLevel, "UnAmalgamationInfo"); - const ParameterList& pL = GetParameterList(); - - // Sanity Checking: ML drop tol scaling is not supported in UncoupledAggregation_Kokkos - TEUCHOS_TEST_FOR_EXCEPTION(pL.get("aggregation: use ml scaling of drop tol"), std::invalid_argument, "Option: 'aggregation: use ml scaling of drop tol' is not supported in the Kokkos version of CoalesceDroPFactory"); - - std::string algo = pL.get("aggregation: drop scheme"); - - double threshold = pL.get("aggregation: drop tol"); - GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold - << ", blocksize = " << A->GetFixedBlockSize() << std::endl; - - const typename STS::magnitudeType dirichletThreshold = - STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); - - GO numDropped = 0, numTotal = 0; - - RCP graph; - LO dofsPerNode = -1; - - typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type; - boundary_nodes_type boundaryNodes; - - RCP filteredA; - if (blkSize == 1 && threshold == zero) { - // Scalar problem without dropping - - // Detect and record rows that correspond to Dirichlet boundary conditions - boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - - // Trivial LWGraph construction - graph = rcp(new LWGraph_kokkos(A->getCrsGraph()->getLocalGraphDevice(), A->getRowMap(), A->getColMap(), "graph of A")); - graph->SetBoundaryNodeMap(boundaryNodes); - - numTotal = A->getLocalNumEntries(); - dofsPerNode = 1; - - filteredA = A; - - } else if (blkSize == 1 && threshold != zero) { - // Scalar problem with dropping - - // Detect and record rows that correspond to Dirichlet boundary conditions - boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - - typedef typename Matrix::local_matrix_type local_matrix_type; - typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; - typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type; - typedef typename kokkos_graph_type::entries_type::non_const_type cols_type; - typedef typename local_matrix_type::values_type::non_const_type vals_type; - - LO numRows = A->getLocalNumRows(); - local_matrix_type kokkosMatrix = A->getLocalMatrixDevice(); - auto nnzA = kokkosMatrix.nnz(); - auto rowsA = kokkosMatrix.graph.row_map; - - typedef Kokkos::ArithTraits ATS; - typedef typename ATS::val_type impl_Scalar; - typedef Kokkos::ArithTraits impl_ATS; - - bool reuseGraph = pL.get("filtered matrix: reuse graph"); - bool lumping = pL.get("filtered matrix: use lumping"); - if (lumping) - GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; + const RCP rowMap = A->getRowMap(); + const RCP colMap = A->getColMap(); + + // build a node row map (uniqueMap = non-overlapping) and a node column map + // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation + // stored in the AmalgamationInfo class container contain the local node id + // given a local dof id. The data is calculated in the AmalgamationFactory and + // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo) + const RCP uniqueMap = amalInfo->getNodeRowMap(); + const RCP nonUniqueMap = amalInfo->getNodeColMap(); + Array rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View? + Array colTranslationArray = *(amalInfo->getColTranslation()); + + Kokkos::View + rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size()); + Kokkos::View + colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size()); + + // get number of local nodes + LO numNodes = Teuchos::as(uniqueMap->getLocalNumElements()); + typedef typename Kokkos::View id_translation_type; + id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size()); + id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size()); + Kokkos::deep_copy(rowTranslation, rowTranslationView); + Kokkos::deep_copy(colTranslation, colTranslationView); + + // extract striding information + blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) + LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map + LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size) + if (A->IsView("stridedMaps") == true) { + const RCP myMap = A->getRowMap("stridedMaps"); + const RCP strMap = Teuchos::rcp_dynamic_cast(myMap); + TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap"); + blkSize = Teuchos::as(strMap->getFixedBlockSize()); + blkId = strMap->getStridedBlockId(); + if (blkId > -1) + blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); + } - const bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize); - // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + setting a single value - rows_type rows("FA_rows", numRows + 1); - cols_type colsAux(Kokkos::ViewAllocateWithoutInitializing("FA_aux_cols"), nnzA); - vals_type valsAux; - if (reuseGraph) { - SubFactoryMonitor m2(*this, "CopyMatrix", currentLevel); - - // Share graph with the original matrix - filteredA = MatrixFactory::Build(A->getCrsGraph()); + ////////////////////////////////////////////////////////////////////// + // Process parameterlist + const ParameterList& pL = GetParameterList(); - // Do a no-op fill-complete - RCP fillCompleteParams(new ParameterList); - fillCompleteParams->set("No Nonlocal Changes", true); - filteredA->fillComplete(fillCompleteParams); + // Boundary detection + const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as(pL.get("aggregation: Dirichlet threshold"))); + const typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); + const LocalOrdinal dirichletNonzeroThreshold = 1; + const bool useGreedyDirichlet = pL.get("aggregation: greedy Dirichlet"); + TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems."); + + // Dropping + const std::string algo = pL.get("aggregation: drop scheme"); + std::string classicalAlgoStr = pL.get("aggregation: classical algo"); + std::string distanceLaplacianAlgoStr = pL.get("aggregation: distance laplacian algo"); + MT threshold; + // If we're doing the ML-style halving of the drop tol at each level, we do that here. + if (pL.get("aggregation: use ml scaling of drop tol")) + threshold = pL.get("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID()); + else + threshold = as(pL.get("aggregation: drop tol")); + bool aggregationMayCreateDirichlet = pL.get("aggregation: dropping may create Dirichlet"); + + // Fill + const bool lumping = pL.get("filtered matrix: use lumping"); + const bool reuseGraph = pL.get("filtered matrix: reuse graph"); + const bool reuseEigenvalue = pL.get("filtered matrix: reuse eigenvalue"); + + const bool useRootStencil = pL.get("filtered matrix: use root stencil"); + const bool useSpreadLumping = pL.get("filtered matrix: use spread lumping"); + TEUCHOS_ASSERT(!useRootStencil); + TEUCHOS_ASSERT(!useSpreadLumping); + + if (algo == "classical") { + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + } else if (algo == "distance laplacian") { + GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + } else + GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl; + + // FIXME: Non-Kokkos implementation does this, but this seems unnecessary. + if (algo == "distance laplacian") + aggregationMayCreateDirichlet = true; + + ////////////////////////////////////////////////////////////////////// + // We perform four sweeps over the rows of A: + // Pass 1: detection of boundary nodes + // Pass 2: diagonal extraction + // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix + // Pass 4: fill of the filtered matrix + // + // Pass 1 and 3 apply a sequence of criteria to each row of the matrix. + + // TODO: We could merge pass 1 and 2. + + auto crsA = rcp_dynamic_cast(A, true)->getCrsMatrix(); + auto lclA = crsA->getLocalMatrixDevice(); + auto range = range_type(0, numNodes); + + ////////////////////////////////////////////////////////////////////// + // Pass 1: Detect boundary nodes + // + // The following criteria are available: + // - BoundaryDetection::VectorDirichletFunctor + // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries + + // Dirichlet nodes + auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false + { + SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel); - // No need to reuseFill, just modify in place - valsAux = filteredA->getLocalMatrixDevice().values; +#define MueLu_runBoundaryFunctors(...) \ + { \ + auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \ + Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \ + } + if (useGreedyDirichlet) { + auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold); + MueLu_runBoundaryFunctors(dirichlet_detection); } else { - // Need an extra array to compress - valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_aux_vals"), nnzA); + auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold); + MueLu_runBoundaryFunctors(dirichlet_detection); } +#undef MueLu_runBoundaryFunctors + } + // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true. + // Otherwise we're now done with it now. + + ////////////////////////////////////////////////////////////////////// + // Pass 2 & 3: Diagonal extraction and determine dropping and construct + // rowptr of filtered matrix + // + // The following criteria are available: + // - Misc::VectorDropBoundaryFunctor + // Drop all rows that have been marked as Dirichlet + // - Misc::DropOffRankFunctor + // Drop all entries that are off-rank + // - ClassicalDropping::SAFunctor + // Classical dropping + // - ClassicalDropping::SignedRSFunctor + // Classical RS dropping + // - ClassicalDropping::SignedSAFunctor + // Classical signed SA dropping + // - DistanceLaplacian::DropFunctor + // Distance Laplacian dropping + // - Misc::KeepDiagonalFunctor + // Mark diagonal as KEEP + // - Misc::MarkSingletonFunctor + // Mark singletons after dropping as Dirichlet + + // rowptr of filtered A + auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1); + auto graph_rowptr = rowptr_type("rowptr", numNodes + 1); + // Number of nonzeros of filtered A and graph + Kokkos::pair nnz = {0, 0}; + + // dropping decisions for each entry + auto results = Kokkos::View("results", lclA.nnz()); // initialized to UNDECIDED + { + SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel); - LO nnzFA = 0; - { - if (algo == "classical") { - // Construct overlapped matrix diagonal - RCP ghostedDiag; - { - kokkosMatrix = local_matrix_type(); - SubFactoryMonitor m2(*this, "Ghosted diag construction", currentLevel); - ghostedDiag = Utilities::GetMatrixOverlappedDiagonal(*A); - kokkosMatrix = A->getLocalMatrixDevice(); - } - - // Filter out entries - { - SubFactoryMonitor m2(*this, "MainLoop", currentLevel); - - auto ghostedDiagView = ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); - - CoalesceDrop_Kokkos_Details::ClassicalDropFunctor dropFunctor(ghostedDiagView, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor - scalarFunctor(kokkosMatrix, boundaryNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, aggregationMayCreateDirichlet); + std::string functorLabel = "MueLu::CoalesceDrop::CountEntries"; - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows), - scalarFunctor, nnzFA); - } +#if !defined(HAVE_MUELU_DEBUG) +#define MueLu_runDroppingFunctors(...) \ + { \ + auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__); \ + Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz); \ + } +#else +#define MueLu_runDroppingFunctors(...) \ + { \ + auto debug = Misc::DebugFunctor(lclA, results); \ + auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__, debug); \ + Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz); \ + } +#endif - } else if (algo == "distance laplacian") { - typedef Xpetra::MultiVector::magnitudeType, LO, GO, NO> doubleMultiVector; - auto coords = Get>(currentLevel, "Coordinates"); + auto drop_boundaries = Misc::VectorDropBoundaryFunctor(lclA, rowTranslation, boundaryNodes, results); - auto uniqueMap = A->getRowMap(); - auto nonUniqueMap = A->getColMap(); + if (threshold != zero) { + auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results); + auto mark_singletons_as_boundary = Misc::MarkSingletonVectorFunctor(lclA, rowTranslation, boundaryNodes, results); - // Construct ghosted coordinates - RCP importer; - { - SubFactoryMonitor m2(*this, "Coords Import construction", currentLevel); - importer = ImportFactory::Build(uniqueMap, nonUniqueMap); + if (algo == "classical") { + if (classicalAlgoStr == "default") { + auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(classical_dropping, + // drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + MueLu_runDroppingFunctors(classical_dropping, + // drop_boundaries, + preserve_diagonals); + } + } else if (classicalAlgoStr == "unscaled cut") { + TEUCHOS_ASSERT(false); + } else if (classicalAlgoStr == "scaled cut") { + TEUCHOS_ASSERT(false); + } else if (classicalAlgoStr == "scaled cut symmetric") { + TEUCHOS_ASSERT(false); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\""); } - RCP ghostedCoords; - { - SubFactoryMonitor m2(*this, "Ghosted coords construction", currentLevel); - ghostedCoords = Xpetra::MultiVectorFactory::magnitudeType, LO, GO, NO>::Build(nonUniqueMap, coords->getNumVectors()); - ghostedCoords->doImport(*coords, *importer, Xpetra::INSERT); + } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") { + auto signed_classical_rs_dropping = ClassicalDropping::SignedRSFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(signed_classical_rs_dropping, + // drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + + } else { + MueLu_runDroppingFunctors(signed_classical_rs_dropping, + // drop_boundaries, + preserve_diagonals); } - - auto ghostedCoordsView = ghostedCoords->getDeviceLocalView(Xpetra::Access::ReadWrite); - CoalesceDrop_Kokkos_Details::DistanceFunctor distFunctor(ghostedCoordsView); - - // Construct Laplacian diagonal - RCP localLaplDiag; - { - SubFactoryMonitor m2(*this, "Local Laplacian diag construction", currentLevel); - - localLaplDiag = VectorFactory::Build(uniqueMap); - - auto localLaplDiagView = localLaplDiag->getDeviceLocalView(Xpetra::Access::OverwriteAll); - auto kokkosGraph = kokkosMatrix.graph; - - Kokkos::parallel_for( - "MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", range_type(0, numRows), - KOKKOS_LAMBDA(const LO row) { - auto rowView = kokkosGraph.rowConst(row); - auto length = rowView.length; - - impl_Scalar d = impl_ATS::zero(); - for (decltype(length) colID = 0; colID < length; colID++) { - auto col = rowView(colID); - if (row != col) - d += impl_ATS::one() / distFunctor.distance2(row, col); - } - localLaplDiagView(row, 0) = d; - }); + } else if (algo == "signed classical sa") { + auto signed_classical_sa_dropping = ClassicalDropping::SignedSAFunctor(*A, threshold, results); + + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(signed_classical_sa_dropping, + // drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + + } else { + MueLu_runDroppingFunctors(signed_classical_sa_dropping, + // drop_boundaries, + preserve_diagonals); } - - // Construct ghosted Laplacian diagonal - RCP ghostedLaplDiag; - { - SubFactoryMonitor m2(*this, "Ghosted Laplacian diag construction", currentLevel); - ghostedLaplDiag = VectorFactory::Build(nonUniqueMap); - ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT); - } - - // Filter out entries - { - SubFactoryMonitor m2(*this, "MainLoop", currentLevel); - - auto ghostedLaplDiagView = ghostedLaplDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); - - CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor - dropFunctor(ghostedLaplDiagView, distFunctor, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor - scalarFunctor(kokkosMatrix, boundaryNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true); - - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows), - scalarFunctor, nnzFA); + } else if (algo == "distance laplacian") { + using doubleMultiVector = Xpetra::MultiVector::magnitudeType, LO, GO, NO>; + auto coords = Get>(currentLevel, "Coordinates"); + + auto dist2 = DistanceLaplacian::DistanceFunctor(*A, coords); + + if (distanceLaplacianAlgoStr == "default") { + auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results); + + if (aggregationMayCreateDirichlet) { + MueLu_runDroppingFunctors(dist_laplacian_dropping, + // drop_boundaries, + preserve_diagonals, + mark_singletons_as_boundary); + } else { + MueLu_runDroppingFunctors(dist_laplacian_dropping, + // drop_boundaries, + preserve_diagonals); + } + } else if (distanceLaplacianAlgoStr == "unscaled cut") { + TEUCHOS_ASSERT(false); + } else if (distanceLaplacianAlgoStr == "scaled cut") { + TEUCHOS_ASSERT(false); + } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") { + TEUCHOS_ASSERT(false); + } else { + TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\""); } + } else { + TEUCHOS_ASSERT(false); } + } else { + Kokkos::deep_copy(results, KEEP); + // MueLu_runDroppingFunctors(drop_boundaries); + auto no_op = Misc::NoOpFunctor(); + MueLu_runDroppingFunctors(no_op); } - numDropped = nnzA - nnzFA; +#undef MueLu_runDroppingFunctors + } + LocalOrdinal nnz_filtered = nnz.first; + LocalOrdinal nnz_graph = nnz.second; + GO numTotal = lclA.nnz(); + GO numDropped = numTotal - nnz_filtered; + // We now know the number of entries of filtered A and have the final rowptr. - { - SubFactoryMonitor m2(*this, "CompressRows", currentLevel); - - // parallel_scan (exclusive) - Kokkos::parallel_scan( - "MueLu:CoalesceDropF:Build:scalar_filter:compress_rows", range_type(0, numRows + 1), - KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) { - update += rows(i); - if (final_pass) - rows(i) = update; - }); - } + ////////////////////////////////////////////////////////////////////// + // Pass 4: Create local matrix for filtered A + // + // Dropped entries are optionally lumped to the diagonal. + + RCP filteredA; + RCP graph; + { + SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel); - // Compress cols (and optionally vals) - // We use a trick here: we moved all remaining elements to the beginning - // of the original row in the main loop, so we don't need to check for - // INVALID here, and just stop when achieving the new number of elements - // per row. - cols_type cols(Kokkos::ViewAllocateWithoutInitializing("FA_cols"), nnzFA); - vals_type vals; + local_matrix_type lclFilteredA; if (reuseGraph) { - GetOStream(Runtime1) << "reuse matrix graph for filtering (compress matrix columns only)" << std::endl; - // Only compress cols - SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); - - Kokkos::parallel_for( - "MueLu:TentativePF:Build:compress_cols", range_type(0, numRows), - KOKKOS_LAMBDA(const LO i) { - // Is there Kokkos memcpy? - LO rowStart = rows(i); - LO rowAStart = rowsA(i); - size_t rownnz = rows(i + 1) - rows(i); - for (size_t j = 0; j < rownnz; j++) - cols(rowStart + j) = colsAux(rowAStart + j); - }); + lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols()); } else { - // Compress cols and vals - GetOStream(Runtime1) << "new matrix graph for filtering (compress matrix columns and values)" << std::endl; - SubFactoryMonitor m2(*this, "CompressColsAndVals", currentLevel); - - vals = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_vals"), nnzFA); - - Kokkos::parallel_for( - "MueLu:TentativePF:Build:compress_cols", range_type(0, numRows), - KOKKOS_LAMBDA(const LO i) { - LO rowStart = rows(i); - LO rowAStart = rowsA(i); - size_t rownnz = rows(i + 1) - rows(i); - for (size_t j = 0; j < rownnz; j++) { - cols(rowStart + j) = colsAux(rowAStart + j); - vals(rowStart + j) = valsAux(rowAStart + j); - } - }); + auto colidx = entries_type("entries", nnz_filtered); + auto values = values_type("values", nnz_filtered); + lclFilteredA = local_matrix_type("filteredA", + lclA.numRows(), lclA.numCols(), + nnz_filtered, + values, filtered_rowptr, colidx); } - kokkos_graph_type kokkosGraph(cols, rows); - + local_graph_type lclGraph; { - SubFactoryMonitor m2(*this, "LWGraph construction", currentLevel); - - graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), "filtered graph of A")); - graph->SetBoundaryNodeMap(boundaryNodes); + auto colidx = entries_type("entries", nnz_graph); + lclGraph = local_graph_type(colidx, graph_rowptr); } - numTotal = A->getLocalNumEntries(); - - dofsPerNode = 1; - - if (!reuseGraph) { - SubFactoryMonitor m2(*this, "LocalMatrix+FillComplete", currentLevel); - - local_matrix_type localFA = local_matrix_type("A", numRows, A->getLocalMatrixDevice().numCols(), nnzFA, vals, rows, cols); - auto filteredACrs = CrsMatrixFactory::Build(localFA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap(), - A->getCrsGraph()->getImporter(), A->getCrsGraph()->getExporter()); - filteredA = rcp(new CrsMatrixWrap(filteredACrs)); + if (lumping) { + if (reuseGraph) { + auto fillFunctor = MatrixConstruction::VectorFillFunctor(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor); + } else { + auto fillFunctor = MatrixConstruction::VectorFillFunctor(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor); + } + } else { + if (reuseGraph) { + auto fillFunctor = MatrixConstruction::VectorFillFunctor(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor); + } else { + auto fillFunctor = MatrixConstruction::VectorFillFunctor(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph); + Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor); + } } - filteredA->SetFixedBlockSize(A->GetFixedBlockSize()); + filteredA = Xpetra::MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap()); + filteredA->SetFixedBlockSize(blkSize); - if (pL.get("filtered matrix: reuse eigenvalue")) { + if (reuseEigenvalue) { // Reuse max eigenvalue from A // It is unclear what eigenvalue is the best for the smoothing, but we already may have // the D^{-1}A estimate in A, may as well use it. @@ -759,144 +1070,18 @@ void CoalesceDropFactory_kokkos:: filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits::one()); } - } else if (blkSize > 1 && threshold == zero) { - // Case 3: block problem without filtering - // - // FIXME_KOKKOS: this code is completely unoptimized. It really should do - // a very simple thing: merge rows and produce nodal graph. But the code - // seems very complicated. Can we do better? - - TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiply of " << blkSize); - - const RCP rowMap = A->getRowMap(); - const RCP colMap = A->getColMap(); - - // build a node row map (uniqueMap = non-overlapping) and a node column map - // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation - // stored in the AmalgamationInfo class container contain the local node id - // given a local dof id. The data is calculated in the AmalgamationFactory and - // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo) - const RCP uniqueMap = amalInfo->getNodeRowMap(); - const RCP nonUniqueMap = amalInfo->getNodeColMap(); - Array rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View? - Array colTranslationArray = *(amalInfo->getColTranslation()); - - Kokkos::View - rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size()); - Kokkos::View - colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size()); - - // get number of local nodes - LO numNodes = Teuchos::as(uniqueMap->getLocalNumElements()); - typedef typename Kokkos::View id_translation_type; - id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size()); - id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size()); - Kokkos::deep_copy(rowTranslation, rowTranslationView); - Kokkos::deep_copy(colTranslation, colTranslationView); - - // extract striding information - blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map) - LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map - LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size) - if (A->IsView("stridedMaps") == true) { - const RCP myMap = A->getRowMap("stridedMaps"); - const RCP strMap = Teuchos::rcp_dynamic_cast(myMap); - TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap"); - blkSize = Teuchos::as(strMap->getFixedBlockSize()); - blkId = strMap->getStridedBlockId(); - if (blkId > -1) - blkPartSize = Teuchos::as(strMap->getStridingData()[blkId]); - } - auto kokkosMatrix = A->getLocalMatrixDevice(); // access underlying kokkos data - - // - typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; - typedef typename kokkos_graph_type::row_map_type row_map_type; - // typedef typename row_map_type::HostMirror row_map_type_h; - typedef typename kokkos_graph_type::entries_type entries_type; - - // Stage 1c: get number of dof-nonzeros per blkSize node rows - typename row_map_type::non_const_type dofNnz("nnz_map", numNodes + 1); - LO numDofCols = 0; - CoalesceDrop_Kokkos_Details::Stage1aVectorFunctor stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0, numNodes), stage1aFunctor, numDofCols); - // parallel_scan (exclusive) - CoalesceDrop_Kokkos_Details::ScanFunctor scanFunctor(dofNnz); - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0, numNodes + 1), scanFunctor); - - // Detect and record dof rows that correspond to Dirichlet boundary conditions - boundary_nodes_type singleEntryRows = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); - - typename entries_type::non_const_type dofcols("dofcols", numDofCols /*dofNnz(numNodes)*/); // why does dofNnz(numNodes) work? should be a parallel reduce, i guess - - // we have dofcols and dofids from Stage1dVectorFunctor - LO numNodeCols = 0; - typename row_map_type::non_const_type rows("nnz_nodemap", numNodes + 1); - typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numNodes); - - CoalesceDrop_Kokkos_Details::Stage1bcVectorFunctor stage1bcFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols, colTranslation, rows, singleEntryRows, bndNodes, pL.get("aggregation: greedy Dirichlet")); - Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0, numNodes), stage1bcFunctor, numNodeCols); - - // parallel_scan (exclusive) - CoalesceDrop_Kokkos_Details::ScanFunctor scanNodeFunctor(rows); - Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0, numNodes + 1), scanNodeFunctor); - - // create column node view - typename entries_type::non_const_type cols("nodecols", numNodeCols); - - CoalesceDrop_Kokkos_Details::Stage1dVectorFunctor stage1dFunctor(dofcols, dofNnz, cols, rows); - Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0, numNodes), stage1dFunctor); - kokkos_graph_type kokkosGraph(cols, rows); - - // create LW graph - graph = rcp(new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A")); - - boundaryNodes = bndNodes; + graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A")); graph->SetBoundaryNodeMap(boundaryNodes); - numTotal = A->getLocalNumEntries(); - - dofsPerNode = blkSize; - - filteredA = A; - - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu: CoalesceDropFactory_kokkos: Block filtering is not implemented"); } - if (GetVerbLevel() & Statistics1) { - GO numLocalBoundaryNodes = 0; - GO numGlobalBoundaryNodes = 0; - - Kokkos::parallel_reduce( - "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)), - KOKKOS_LAMBDA(const LO i, GO& n) { - if (boundaryNodes(i)) - n++; - }, - numLocalBoundaryNodes); - - auto comm = A->getRowMap()->getComm(); - MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes); - GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl; - } - - if ((GetVerbLevel() & Statistics1) && threshold != zero) { - auto comm = A->getRowMap()->getComm(); - - GO numGlobalTotal, numGlobalDropped; - MueLu_sumAll(comm, numTotal, numGlobalTotal); - MueLu_sumAll(comm, numDropped, numGlobalDropped); - - if (numGlobalTotal != 0) { - GetOStream(Statistics1) << "Number of dropped entries: " - << numGlobalDropped << "/" << numGlobalTotal - << " (" << 100 * Teuchos::as(numGlobalDropped) / Teuchos::as(numGlobalTotal) << "%)" << std::endl; - } - } + LO dofsPerNode = blkSize; Set(currentLevel, "DofsPerNode", dofsPerNode); Set(currentLevel, "Graph", graph); Set(currentLevel, "A", filteredA); + + return std::make_tuple(numDropped, boundaryNodes); } + } // namespace MueLu #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp new file mode 100644 index 000000000000..1bb2fa1b1648 --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp @@ -0,0 +1,532 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_CUTDROP_HPP +#define MUELU_CUTDROP_HPP + +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" +#include "MueLu_DroppingCommon.hpp" +#include "MueLu_Utilities.hpp" +#include "Xpetra_Matrix.hpp" +#include "Xpetra_MultiVector.hpp" +#include "MueLu_DistanceLaplacianDropping.hpp" + +namespace MueLu::CutDrop { + +/*! Cut drop algorithm options*/ +enum decisionAlgoType { defaultAlgo, + unscaled_cut, + scaled_cut, + scaled_cut_symmetric }; + +/*! + @class UnscaledComparison + @brief Orders entries of row \f$i\f$ by \f$|A_{ij}|^2\f$. +*/ +template +class UnscaledComparison { + public: + using matrix_type = Xpetra::Matrix; + + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + private: + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + public: + UnscaledComparison(matrix_type& A_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , results(results_) {} + + template + struct Comparator { + private: + using scalar_type = typename local_matrix_type2::value_type; + using local_ordinal_type = typename local_matrix_type2::ordinal_type; + using memory_space = typename local_matrix_type2::memory_space; + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + const local_matrix_type2 A; + const local_ordinal_type offset; + const results_view results; + + public: + KOKKOS_INLINE_FUNCTION + Comparator(const local_matrix_type2& A_, local_ordinal_type rlid_, const results_view& results_) + : A(A_) + , offset(A_.graph.row_map(rlid_)) + , results(results_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType get_value(size_t x) const { + return ATS::magnitude(A.values(offset + x) * A.values(offset + x)); + } + + KOKKOS_INLINE_FUNCTION + bool operator()(size_t x, size_t y) const { + if (results(offset + x) != UNDECIDED) { + if (results(offset + y) != UNDECIDED) { + // does not matter + return (x < y); + } else { + // sort undecided to the right + return true; + } + } else { + if (results(offset + y) != UNDECIDED) { + // sort undecided to the right + return false; + } else { + return get_value(x) > get_value(y); + } + } + } + }; + + using comparator_type = Comparator; + + KOKKOS_INLINE_FUNCTION + comparator_type getComparator(local_ordinal_type rlid) const { + return comparator_type(A, rlid, results); + } +}; + +/*! + @class ScaledComparison + @brief Orders entries of row \f$i\f$ by \f$\frac{|A_{ij}|^2}{|A_{ii}| |A_{jj}|}\f$. +*/ +template +class ScaledComparison { + public: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_vec_type = Xpetra::MultiVector; + using diag_view_type = typename Kokkos::DualView::t_dev; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + private: + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + Teuchos::RCP diagVec; + diag_view_type diag; + + public: + ScaledComparison(matrix_type& A_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , results(results_) { + diagVec = Utilities::GetMatrixOverlappedDiagonal(A_); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + template + struct Comparator { + private: + using scalar_type = typename local_matrix_type2::value_type; + using local_ordinal_type = typename local_matrix_type2::ordinal_type; + using memory_space = typename local_matrix_type2::memory_space; + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + const local_matrix_type2 A; + const diag_view_type2 diag; + const local_ordinal_type rlid; + const local_ordinal_type offset; + const results_view results; + + public: + KOKKOS_INLINE_FUNCTION + Comparator(const local_matrix_type2& A_, const diag_view_type2& diag_, const local_ordinal_type rlid_, const results_view& results_) + : A(A_) + , diag(diag_) + , rlid(rlid_) + , offset(A_.graph.row_map(rlid_)) + , results(results_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType get_value(size_t x) const { + auto x_aij = ATS::magnitude(A.values(offset + x) * A.values(offset + x)); + auto x_aiiajj = ATS::magnitude(diag(rlid) * diag(A.graph.entries(offset + x))); + return (x_aij / x_aiiajj); + } + + KOKKOS_INLINE_FUNCTION + bool operator()(size_t x, size_t y) const { + if (results(offset + x) != UNDECIDED) { + if (results(offset + y) != UNDECIDED) { + // does not matter + return (x < y); + } else { + // sort undecided to the right + return true; + } + } else { + if (results(offset + y) != UNDECIDED) { + // sort undecided to the right + return false; + } else { + return get_value(x) > get_value(y); + } + } + } + }; + + using comparator_type = Comparator; + + KOKKOS_INLINE_FUNCTION + comparator_type getComparator(local_ordinal_type rlid) const { + return comparator_type(A, diag, rlid, results); + } +}; + +template +class UnscaledDistanceLaplacianComparison { + public: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_vec_type = Xpetra::MultiVector; + using diag_view_type = typename Kokkos::DualView::t_dev; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + private: + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + Teuchos::RCP diagVec; + diag_view_type diag; + DistanceFunctorType dist2; + + public: + UnscaledDistanceLaplacianComparison(matrix_type& A_, DistanceFunctorType& dist2_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , results(results_) + , dist2(dist2_) { + // Construct ghosted distance Laplacian diagonal + diagVec = DistanceLaplacian::getDiagonal(A_, dist2); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + template + struct Comparator { + private: + using scalar_type = typename local_matrix_type2::value_type; + using local_ordinal_type = typename local_matrix_type2::ordinal_type; + using memory_space = typename local_matrix_type2::memory_space; + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + const local_matrix_type2 A; + const diag_view_type2 diag; + const DistanceFunctorType2* dist2; + const local_ordinal_type rlid; + const local_ordinal_type offset; + const results_view results; + + const scalar_type one = ATS::one(); + + public: + KOKKOS_INLINE_FUNCTION + Comparator(const local_matrix_type2& A_, const diag_view_type2& diag_, const DistanceFunctorType2* dist2_, local_ordinal_type rlid_, const results_view& results_) + : A(A_) + , diag(diag_) + , dist2(dist2_) + , rlid(rlid_) + , offset(A_.graph.row_map(rlid_)) + , results(results_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType get_value(size_t x) const { + auto clid = A.graph.entries(offset + x); + scalar_type val; + if (rlid != clid) { + val = one / dist2->distance2(rlid, clid); + } else { + val = diag(rlid); + } + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + return aij2; + } + + KOKKOS_INLINE_FUNCTION + bool operator()(size_t x, size_t y) const { + if (results(offset + x) != UNDECIDED) { + if (results(offset + y) != UNDECIDED) { + // does not matter + return (x < y); + } else { + // sort undecided to the right + return true; + } + } else { + if (results(offset + y) != UNDECIDED) { + // sort undecided to the right + return false; + } else { + return get_value(x) > get_value(y); + } + } + } + }; + + using comparator_type = Comparator; + + KOKKOS_INLINE_FUNCTION + comparator_type getComparator(local_ordinal_type rlid) const { + return comparator_type(A, diag, &dist2, rlid, results); + } +}; + +/*! + @class ScaledDistanceLaplacianComparison + @brief Orders entries of row \f$i\f$ by \f$\frac{|d_{ij}|^2}{|d_{ii}| |d_{jj}|}\f$ where \f$d_ij\f$ is the distance Laplacian. +*/ +template +class ScaledDistanceLaplacianComparison { + public: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_vec_type = Xpetra::MultiVector; + using diag_view_type = typename Kokkos::DualView::t_dev; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + private: + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + Teuchos::RCP diagVec; + diag_view_type diag; + DistanceFunctorType dist2; + + public: + ScaledDistanceLaplacianComparison(matrix_type& A_, DistanceFunctorType& dist2_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , results(results_) + , dist2(dist2_) { + // Construct ghosted distance Laplacian diagonal + diagVec = DistanceLaplacian::getDiagonal(A_, dist2); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + template + struct Comparator { + private: + using scalar_type = typename local_matrix_type2::value_type; + using local_ordinal_type = typename local_matrix_type2::ordinal_type; + using memory_space = typename local_matrix_type2::memory_space; + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + + const local_matrix_type2 A; + const diag_view_type2 diag; + const DistanceFunctorType2* dist2; + const local_ordinal_type rlid; + const local_ordinal_type offset; + const results_view results; + + const scalar_type one = ATS::one(); + + public: + KOKKOS_INLINE_FUNCTION + Comparator(const local_matrix_type2& A_, const diag_view_type2& diag_, const DistanceFunctorType2* dist2_, local_ordinal_type rlid_, const results_view& results_) + : A(A_) + , diag(diag_) + , dist2(dist2_) + , rlid(rlid_) + , offset(A_.graph.row_map(rlid_)) + , results(results_) {} + + KOKKOS_INLINE_FUNCTION + magnitudeType get_value(size_t x) const { + auto clid = A.graph.entries(offset + x); + scalar_type val; + if (rlid != clid) { + val = one / dist2->distance2(rlid, clid); + } else { + val = diag(rlid); + } + auto aiiajj = ATS::magnitude(diag(rlid)) * ATS::magnitude(diag(clid)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + return (aij2 / aiiajj); + } + + KOKKOS_INLINE_FUNCTION + bool operator()(size_t x, size_t y) const { + if (results(offset + x) != UNDECIDED) { + if (results(offset + y) != UNDECIDED) { + // does not matter + return (x < y); + } else { + // sort undecided to the right + return true; + } + } else { + if (results(offset + y) != UNDECIDED) { + // sort undecided to the right + return false; + } else { + return get_value(x) > get_value(y); + } + } + } + }; + + using comparator_type = Comparator; + + KOKKOS_INLINE_FUNCTION + comparator_type getComparator(local_ordinal_type rlid) const { + return comparator_type(A, diag, &dist2, rlid, results); + } +}; + +template +KOKKOS_INLINE_FUNCTION void serialHeapSort(view_type& v, comparator_type comparator) { + auto N = v.extent(0); + size_t start = N / 2; + size_t end = N; + while (end > 1) { + if (start > 0) + start = start - 1; + else { + end = end - 1; + auto temp = v(0); + v(0) = v(end); + v(end) = temp; + } + size_t root = start; + while (2 * root + 1 < end) { + size_t child = 2 * root + 1; + if ((child + 1 < end) and (comparator(v(child), v(child + 1)))) + ++child; + + if (comparator(v(root), v(child))) { + auto temp = v(root); + v(root) = v(child); + v(child) = temp; + root = child; + } else + break; + } + } +} + +/*! + @class CutDropFunctor + @brief Order each row by a criterion, compare the ratio of values and drop all entries once the ratio is below the threshold. +*/ +template +class CutDropFunctor { + private: + using local_matrix_type = typename comparison_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + comparison_type comparison; + magnitudeType eps; + results_view results; + Kokkos::View index; + + public: + CutDropFunctor(comparison_type& comparison_, magnitudeType threshold) + : A(comparison_.A) + , comparison(comparison_) + , eps(threshold) + , results(comparison_.results) { + index = Kokkos::View("indices", A.nnz()); + } + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(const local_ordinal_type& rlid) const { + auto row = A.rowConst(rlid); + size_t nnz = row.length; + + auto drop_view = Kokkos::subview(results, Kokkos::make_pair(A.graph.row_map(rlid), A.graph.row_map(rlid + 1))); + auto row_permutation = Kokkos::subview(index, Kokkos::make_pair(A.graph.row_map(rlid), A.graph.row_map(rlid + 1))); + + auto comparator = comparison.getComparator(rlid); + + for (size_t i = 0; i < nnz; ++i) { + row_permutation(i) = i; + } + serialHeapSort(row_permutation, comparator); + + size_t keepStart = 0; + size_t dropStart = nnz; + // find index where dropping starts + for (size_t i = 1; i < nnz; ++i) { + auto const& x = row_permutation(i - 1); + auto const& y = row_permutation(i); + if ((drop_view(x) != UNDECIDED) && (drop_view(y) == UNDECIDED)) + keepStart = i; + if ((drop_view(x) != UNDECIDED) || (drop_view(y) != UNDECIDED)) + continue; + magnitudeType x_aij = comparator.get_value(x); + magnitudeType y_aij = comparator.get_value(y); + if (eps * eps * x_aij > y_aij) { + if (i < dropStart) { + dropStart = i; + } + } + } + + // drop everything to the right of where values stop passing threshold + for (size_t i = keepStart; i < nnz; ++i) { + drop_view(row_permutation(i)) = Kokkos::max(dropStart <= i ? DROP : KEEP, drop_view(row_permutation(i))); + } + } +}; + +} // namespace MueLu::CutDrop + +#endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp new file mode 100644 index 000000000000..12161d0d11d9 --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DistanceLaplacianDropping.hpp @@ -0,0 +1,195 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_DISTANCELAPLACIANDROPPING_HPP +#define MUELU_DISTANCELAPLACIANDROPPING_HPP + +#include "MueLu_DroppingCommon.hpp" +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" +#include "Teuchos_RCP.hpp" +#include "Xpetra_Matrix.hpp" +#include "Xpetra_MultiVector.hpp" +#include "Xpetra_MultiVectorFactory.hpp" + +namespace MueLu::DistanceLaplacian { + +/*! +@class DistanceFunctor +@brief Computes the unscaled distance Laplacian. +*/ +template +class DistanceFunctor { + private: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = LocalOrdinal; + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using magATS = Kokkos::ArithTraits; + using coords_type = Xpetra::MultiVector; + using local_coords_type = typename coords_type::dual_view_type_const::t_dev; + + Teuchos::RCP coordsMV; + Teuchos::RCP ghostedCoordsMV; + + local_coords_type coords; + local_coords_type ghostedCoords; + + public: + DistanceFunctor(matrix_type& A, Teuchos::RCP& coords_) { + coordsMV = coords_; + auto importer = A.getCrsGraph()->getImporter(); + if (!importer.is_null()) { + ghostedCoordsMV = Xpetra::MultiVectorFactory::Build(importer->getTargetMap(), coordsMV->getNumVectors()); + ghostedCoordsMV->doImport(*coordsMV, *importer, Xpetra::INSERT); + coords = coordsMV->getDeviceLocalView(Xpetra::Access::ReadOnly); + ghostedCoords = ghostedCoordsMV->getDeviceLocalView(Xpetra::Access::ReadOnly); + } else { + coords = coordsMV->getDeviceLocalView(Xpetra::Access::ReadOnly); + ghostedCoords = coords; + } + } + + KOKKOS_FORCEINLINE_FUNCTION + magnitudeType distance2(const local_ordinal_type row, const local_ordinal_type col) const { + magnitudeType d = magATS::zero(); + magnitudeType s; + for (size_t j = 0; j < coords.extent(1); ++j) { + s = coords(row, j) - ghostedCoords(col, j); + d += s * s; + } + return d; + } +}; + +/*! +Method to compute ghosted distance Laplacian diagonal. +*/ +template +Teuchos::RCP > +getDiagonal(Xpetra::Matrix& A, + DistanceFunctorType& distFunctor) { + using scalar_type = Scalar; + using local_ordinal_type = LocalOrdinal; + using global_ordinal_type = GlobalOrdinal; + using node_type = Node; + using ATS = Kokkos::ArithTraits; + using impl_scalar_type = typename ATS::val_type; + using implATS = Kokkos::ArithTraits; + using magnitudeType = typename implATS::magnitudeType; + using execution_space = typename Node::execution_space; + using range_type = Kokkos::RangePolicy; + + auto diag = Xpetra::MultiVectorFactory::Build(A.getRowMap(), 1); + { + auto lclA = A.getLocalMatrixDevice(); + auto lclDiag = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll); + + Kokkos::parallel_for( + "MueLu:CoalesceDropF:Build:scalar_filter:laplacian_diag", + range_type(0, lclA.numRows()), + KOKKOS_LAMBDA(const local_ordinal_type& row) { + auto rowView = lclA.rowConst(row); + auto length = rowView.length; + + magnitudeType d; + impl_scalar_type d2 = implATS::zero(); + for (local_ordinal_type colID = 0; colID < length; colID++) { + auto col = rowView.colidx(colID); + if (row != col) { + d = distFunctor.distance2(row, col); + d2 += implATS::one() / d; + } + } + lclDiag(row, 0) = d2; + }); + } + auto importer = A.getCrsGraph()->getImporter(); + if (!importer.is_null()) { + auto ghostedDiag = Xpetra::MultiVectorFactory::Build(A.getColMap(), 1); + ghostedDiag->doImport(*diag, *importer, Xpetra::INSERT); + return ghostedDiag; + } else { + return diag; + } +} + +/*! +@class DropFunctor +@brief Drops entries the unscaled distance Laplacian. + +Evaluates the dropping criterion +\f[ +\frac{|d_{ij}|^2}{|d_{ii}| |d_{jj}|} \le \theta^2 +\f] +where \f$d_{ij}\f$ is a distance metric. +*/ +template +class DropFunctor { + private: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using diag_vec_type = Xpetra::MultiVector; + using diag_view_type = typename Kokkos::DualView::t_dev; + + using results_view = Kokkos::View; + + using ATS = Kokkos::ArithTraits; + using magnitudeType = typename ATS::magnitudeType; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + magnitudeType eps; + Teuchos::RCP diagVec; + diag_view_type diag; // corresponds to overlapped diagonal + DistanceFunctorType dist2; + results_view results; + const scalar_type one = ATS::one(); + + public: + DropFunctor(matrix_type& A_, magnitudeType threshold, DistanceFunctorType& dist2_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , eps(threshold) + , dist2(dist2_) + , results(results_) { + diagVec = getDiagonal(A_, dist2); + auto lclDiag2d = diagVec->getDeviceLocalView(Xpetra::Access::ReadOnly); + diag = Kokkos::subview(lclDiag2d, Kokkos::ALL(), 0); + } + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + + scalar_type val; + if (rlid != clid) { + val = one / dist2.distance2(rlid, clid); + } else { + val = diag(rlid); + } + auto aiiajj = ATS::magnitude(diag(rlid)) * ATS::magnitude(diag(clid)); // |a_ii|*|a_jj| + auto aij2 = ATS::magnitude(val) * ATS::magnitude(val); // |a_ij|^2 + + results(offset + k) = Kokkos::max((aij2 <= eps * eps * aiiajj) ? DROP : KEEP, + results(offset + k)); + } + } +}; + +} // namespace MueLu::DistanceLaplacian + +#endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp new file mode 100644 index 000000000000..dd371c124fcd --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp @@ -0,0 +1,409 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_DROPPINGCOMMON_HPP +#define MUELU_DROPPINGCOMMON_HPP + +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" +#include "Xpetra_Access.hpp" +#include "Xpetra_Matrix.hpp" + +namespace MueLu { + +/*! Possible decision for a single entry. + Once we are done with dropping, we should have no UNDECIDED entries left. + Normally, both DROP and BOUNDARY entries will be dropped, but we distinguish them in case we want to keep boundaries. + */ +enum DecisionType { + UNDECIDED = 0, // no decision has been taken yet, used for initialization + KEEP = 1, // keeep the entry + DROP = 2, // drop it + BOUNDARY = 3 // entry is a boundary +}; + +namespace Misc { + +template +class NoOpFunctor { + public: + NoOpFunctor() {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + } +}; + +/*! + @class PointwiseDropBoundaryFunctor + @brief Functor that drops boundary nodes for a blockSize == 1 problem. +*/ +template +class PointwiseDropBoundaryFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + boundary_nodes_view boundaryNodes; + results_view results; + + public: + PointwiseDropBoundaryFunctor(local_matrix_type& A_, boundary_nodes_view boundaryNodes_, results_view& results_) + : A(A_) + , boundaryNodes(boundaryNodes_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + const bool isBoundaryRow = boundaryNodes(rlid); + if (isBoundaryRow) { + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + results(offset + k) = Kokkos::max(rlid == clid ? KEEP : DROP, + results(offset + k)); + } + } + } +}; + +/*! + @class VectorDropBoundaryFunctor + @brief Functor that drops boundary nodes for a blockSize > 1 problem. +*/ +template +class VectorDropBoundaryFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using boundary_nodes_view = Kokkos::View; + using block_indices_view_type = Kokkos::View; + + local_matrix_type A; + block_indices_view_type point_to_block; + boundary_nodes_view boundaryNodes; + results_view results; + + public: + VectorDropBoundaryFunctor(local_matrix_type& A_, block_indices_view_type point_to_block_, boundary_nodes_view boundaryNodes_, results_view& results_) + : A(A_) + , point_to_block(point_to_block_) + , boundaryNodes(boundaryNodes_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + const bool isBoundaryRow = boundaryNodes(point_to_block(rlid)); + if (isBoundaryRow) { + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + results(offset + k) = Kokkos::max(rlid == clid ? KEEP : DROP, + results(offset + k)); + } + } + } +}; + +/*! +@class KeepDiagonalFunctor +@brief Functor that marks diagonal as kept, unless the are already marked as boundary. +*/ +template +class KeepDiagonalFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + public: + KeepDiagonalFunctor(local_matrix_type& A_, results_view& results_) + : A(A_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if ((rlid == clid) && (results(offset + k) != BOUNDARY)) { + results(offset + k) = KEEP; + break; + } + } + } +}; + +/*! +@class DropOffRankFunctor +@brief Functor that drops off-rank entries +*/ +template +class DropOffRankFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + public: + DropOffRankFunctor(local_matrix_type& A_, results_view& results_) + : A(A_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if (clid >= A.numRows()) { + results(offset + k) = Kokkos::max(DROP, results(offset + k)); + } + } + } +}; + +/*! +@class MarkSingletonFunctor +@brief Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary. +*/ +template +class MarkSingletonFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + boundary_nodes_view boundaryNodes; + results_view results; + + public: + MarkSingletonFunctor(local_matrix_type& A_, boundary_nodes_view boundaryNodes_, results_view& results_) + : A(A_) + , boundaryNodes(boundaryNodes_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if ((results(offset + k) == KEEP) && (rlid != clid)) + return; + } + boundaryNodes(rlid) = true; + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if (rlid == clid) + results(offset + k) = KEEP; + else + results(offset + k) = BOUNDARY; + } + } +}; + +/*! +@class MarkSingletonVectorFunctor +@brief Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary. +*/ +template +class MarkSingletonVectorFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using block_indices_view_type = Kokkos::View; + + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + block_indices_view_type point_to_block; + boundary_nodes_view boundaryNodes; + results_view results; + + public: + MarkSingletonVectorFunctor(local_matrix_type& A_, block_indices_view_type point_to_block_, boundary_nodes_view boundaryNodes_, results_view& results_) + : A(A_) + , point_to_block(point_to_block_) + , boundaryNodes(boundaryNodes_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if ((results(offset + k) == KEEP) && (rlid != clid)) + return; + } + auto brlid = point_to_block(rlid); + boundaryNodes(brlid) = true; + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if (rlid == clid) + results(offset + k) = KEEP; + else + results(offset + k) = BOUNDARY; + } + } +}; + +/*! +@class BlockDiagonalizeFunctor +@brief Functor that drops all entries that are not on the block diagonal. +*/ +template +class BlockDiagonalizeFunctor { + private: + using matrix_type = Xpetra::Matrix; + using local_matrix_type = typename matrix_type::local_matrix_type; + + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using block_indices_type = Xpetra::MultiVector; + using local_block_indices_view_type = typename block_indices_type::dual_view_type_const::t_dev; + + local_matrix_type A; + local_block_indices_view_type point_to_block; + local_block_indices_view_type ghosted_point_to_block; + results_view results; + + public: + BlockDiagonalizeFunctor(matrix_type& A_, block_indices_type& point_to_block_, block_indices_type& ghosted_point_to_block_, results_view& results_) + : A(A_.getLocalMatrixDevice()) + , point_to_block(point_to_block_.getDeviceLocalView(Xpetra::Access::ReadOnly)) + , ghosted_point_to_block(ghosted_point_to_block_.getDeviceLocalView(Xpetra::Access::ReadOnly)) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + if (point_to_block(rlid, 0) == ghosted_point_to_block(clid, 0)) { + results(offset + k) = Kokkos::max(KEEP, results(offset + k)); + } else { + results(offset + k) = Kokkos::max(DROP, results(offset + k)); + } + } + } +}; + +/*! +@class DebugFunctor +@brief Functor that checks that all entries have been marked. +*/ +template +class DebugFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using boundary_nodes_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + public: + DebugFunctor(local_matrix_type& A_, results_view& results_) + : A(A_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + if (results(offset + k) == UNDECIDED) { + Kokkos::printf("No dropping decision was taken for entry (%d, %d)\n", rlid, row.colidx(k)); + assert(false); + } + } + } +}; + +/*! +@class SymmetrizeFunctor +@brief Functor that symmetrizes the dropping decisions. +*/ +template +class SymmetrizeFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + local_matrix_type A; + results_view results; + + public: + SymmetrizeFunctor(local_matrix_type& A_, results_view& results_) + : A(A_) + , results(results_) {} + + KOKKOS_FORCEINLINE_FUNCTION + void operator()(local_ordinal_type rlid) const { + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + for (local_ordinal_type k = 0; k < row.length; ++k) { + if (results(offset + k) == KEEP) { + auto clid = row.colidx(k); + if (clid >= A.numRows()) + continue; + auto row2 = A.rowConst(clid); + const size_t offset2 = A.graph.row_map(clid); + for (local_ordinal_type k2 = 0; k2 < row2.length; ++k2) { + auto clid2 = row2.colidx(k2); + if (clid2 == rlid) { + if (results(offset2 + k2) == DROP) + results(offset2 + k2) = KEEP; + break; + } + } + } + } + } +}; + +} // namespace Misc + +} // namespace MueLu + +#endif diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp new file mode 100644 index 000000000000..1a5f2729c72e --- /dev/null +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp @@ -0,0 +1,819 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_MATRIXCONSTRUCTION_HPP +#define MUELU_MATRIXCONSTRUCTION_HPP + +#include "Kokkos_Core.hpp" +#include "Kokkos_ArithTraits.hpp" + +#include "MueLu_DroppingCommon.hpp" + +#ifdef MUELU_COALESCE_DROP_DEBUG +// For demangling function names +#include +#endif + +namespace MueLu::MatrixConstruction { +/*! + @class PointwiseCountingFunctor + @brief Functor that executes a sequence of sub-functors on each row for a problem with blockSize == 1. + + The functor applies a series of functors to each row of the matrix. + Each sub-functor can modify the decision to drop or keep any matrix entry in the given row. + These decisions are applied to the results_view. + Once a row has been processed by all sub-functors, the number of entries in the row after dropping is determined. + The result is saved as offsets in rowptr. +*/ +template +class PointwiseCountingFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; + + local_matrix_type A; + results_view results; + rowptr_type rowptr; + functor_type functor; + PointwiseCountingFunctor remainingFunctors; + bool firstFunctor; + +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string functorName; +#endif + + public: + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type& functor_, remaining_functor_types&... remainingFunctors_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor(functor_) + , remainingFunctors(A_, results_, rowptr_, false, remainingFunctors_...) + , firstFunctor(true) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, bool firstFunctor_, functor_type& functor_, remaining_functor_types&... remainingFunctors_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor(functor_) + , remainingFunctors(A_, results_, rowptr_, false, remainingFunctors_...) + , firstFunctor(firstFunctor_) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif + } + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type rlid, local_ordinal_type& nnz, const bool& final) const { +#ifdef MUELU_COALESCE_DROP_DEBUG + if (firstFunctor) { + Kokkos::printf("\nStarting on row %d\n", rlid); + + auto row = A.rowConst(rlid); + + Kokkos::printf("indices: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + Kokkos::printf("%5d ", clid); + } + Kokkos::printf("\n"); + + Kokkos::printf("values: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto val = row.value(k); + Kokkos::printf("%5f ", val); + } + Kokkos::printf("\n"); + } +#endif + + functor(rlid); + +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("%s\n", functorName.c_str()); + + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + + Kokkos::printf("decisions: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + Kokkos::printf("%5d ", results(offset + k)); + } + Kokkos::printf("\n"); + } +#endif + + remainingFunctors(rlid, nnz, final); + } +}; + +template +class PointwiseCountingFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + + using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; + + local_matrix_type A; + results_view results; + rowptr_type rowptr; + functor_type functor; + bool firstFunctor; + +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string functorName; +#endif + + public: + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type& functor_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor(functor_) + , firstFunctor(true) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif + } + + PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, bool firstFunctor_, functor_type& functor_) + : A(A_) + , results(results_) + , rowptr(rowptr_) + , functor(functor_) + , firstFunctor(firstFunctor_) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(functorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif + } + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type rlid, local_ordinal_type& nnz, const bool& final) const { +#ifdef MUELU_COALESCE_DROP_DEBUG + if (firstFunctor) { + Kokkos::printf("\nStarting on row %d\n", rlid); + + auto row = A.rowConst(rlid); + + Kokkos::printf("indices: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + Kokkos::printf("%5d ", clid); + } + Kokkos::printf("\n"); + + Kokkos::printf("values: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto val = row.value(k); + Kokkos::printf("%5f ", val); + } + Kokkos::printf("\n"); + } +#endif + + functor(rlid); + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("%s\n", functorName); + + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + + Kokkos::printf("decisions: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + Kokkos::printf("%5d ", results(offset + k)); + } + + Kokkos::printf("\n"); + Kokkos::printf("Done with row %d\n", rlid); +#endif + + size_t start = A.graph.row_map(rlid); + size_t end = A.graph.row_map(rlid + 1); + for (size_t i = start; i < end; ++i) { + if (results(i) == KEEP) { + ++nnz; + } + } + if (final) + rowptr(rlid + 1) = nnz; + } +}; + +/*! + @class PointwiseFillReuseFunctor + @brief Functor that fills the filtered matrix while reusing the graph of the matrix before dropping, blockSize == 1. + + The dropped graph is built from scratch. + The filtered matrix reuses the graph of the matrix before dropping. + Lumps dropped entries to the diagonal if lumping==true. +*/ +template +class PointwiseFillReuseFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using ATS = Kokkos::ArithTraits; + + local_matrix_type A; + results_view results; + local_matrix_type filteredA; + local_graph_type graph; + const scalar_type zero = ATS::zero(); + + public: + PointwiseFillReuseFunctor(local_matrix_type& A_, results_view& results_, local_matrix_type& filteredA_, local_graph_type& graph_) + : A(A_) + , results(results_) + , filteredA(filteredA_) + , graph(graph_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto rowA = A.row(rlid); + size_t row_start = A.graph.row_map(rlid); + auto rowFilteredA = filteredA.row(rlid); + local_ordinal_type j = 0; + local_ordinal_type jj = 0; + local_ordinal_type graph_offset = graph.row_map(rlid); + scalar_type diagCorrection = zero; + local_ordinal_type diagOffset = -1; + for (local_ordinal_type k = 0; k < rowA.length; ++k) { + if constexpr (lumping) { + local_ordinal_type clid = rowA.colidx(k); + if (rlid == clid) { + diagOffset = j; + } + } + if (results(row_start + k) == KEEP) { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = rowA.value(k); + ++j; + graph.entries(graph_offset + jj) = rowA.colidx(k); + ++jj; + } else if constexpr (lumping) { + diagCorrection += rowA.value(k); + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = zero; + ++j; + } else { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = zero; + ++j; + } + } + if constexpr (lumping) { + rowFilteredA.value(diagOffset) += diagCorrection; + } + } +}; + +/*! + @class PointwiseFillNoReuseFunctor + @brief Functor does not reuse the graph of the matrix for a problem with blockSize == 1. + + The dropped graph and the filtered matrix are built from scratch. + Lumps dropped entries to the diagonal if lumping==true. +*/ +template +class PointwiseFillNoReuseFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using ATS = Kokkos::ArithTraits; + + local_matrix_type A; + results_view results; + local_matrix_type filteredA; + const scalar_type zero = ATS::zero(); + + public: + PointwiseFillNoReuseFunctor(local_matrix_type& A_, results_view& results_, local_matrix_type& filteredA_) + : A(A_) + , results(results_) + , filteredA(filteredA_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type rlid) const { + auto rowA = A.row(rlid); + size_t K = A.graph.row_map(rlid); + auto rowFilteredA = filteredA.row(rlid); + local_ordinal_type j = 0; + scalar_type diagCorrection = zero; + local_ordinal_type diagOffset = -1; + for (local_ordinal_type k = 0; k < rowA.length; ++k) { + if constexpr (lumping) { + local_ordinal_type clid = rowA.colidx(k); + if (rlid == clid) { + diagOffset = j; + } + } + if (results(K + k) == KEEP) { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = rowA.value(k); + ++j; + } else if constexpr (lumping) { + diagCorrection += rowA.value(k); + } + } + if constexpr (lumping) { + rowFilteredA.value(diagOffset) += diagCorrection; + } + } +}; + +/*! + @class VectorCountingFunctor + @brief Functor that executes a sequence of sub-functors on each block of rows. + + The functor applies a series of functors to each row of the matrix. + Each sub-functor can modify the decision to drop or keep any matrix entry in the given row. + These decisions are applied to the results_view. + Once a row has been processed by all sub-functors, the number of entries in the row after dropping is determined. + The result is saved as offsets in rowptr. +*/ +template +class VectorCountingFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using block_indices_view_type = Kokkos::View; + + using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; + using ATS = Kokkos::ArithTraits; + + local_matrix_type A; + local_ordinal_type blockSize; + block_indices_view_type ghosted_point_to_block; + results_view results; + rowptr_type filtered_rowptr; + rowptr_type graph_rowptr; + + functor_type functor; + VectorCountingFunctor remainingFunctors; + + std::vector functorNames; + + public: + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, functor_type& functor_, remaining_functor_types&... remainingFunctors_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor(functor_) + , remainingFunctors(A_, blockSize_, ghosted_point_to_block_, results_, filtered_rowptr_, graph_rowptr_, remainingFunctors_...) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(mangledFunctorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif + } + + KOKKOS_INLINE_FUNCTION + void join(Kokkos::pair& dest, const Kokkos::pair& src) const { + dest.first += src.first; + dest.second += src.second; + } + + KOKKOS_INLINE_FUNCTION + void operatorRow(const local_ordinal_type rlid) const { + functor(rlid); + remainingFunctors.operatorRow(rlid); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type brlid, Kokkos::pair& nnz, const bool& final) const { + auto nnz_filtered = &nnz.first; + auto nnz_graph = &nnz.second; + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("\nStarting on block row %d\n", brlid); +#endif + for (local_ordinal_type rlid = blockSize * brlid; rlid < blockSize * (brlid + 1); ++rlid) { +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("\nStarting on row %d\n", rlid); + + auto row = A.rowConst(rlid); + + Kokkos::printf("indices: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + Kokkos::printf("%5d ", clid); + } + Kokkos::printf("\n"); + + Kokkos::printf("values: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto val = row.value(k); + Kokkos::printf("%5f ", val); + } + Kokkos::printf("\n"); + } +#endif + + functor(rlid); + remainingFunctors.operatorRow(rlid); + +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("%s\n", functorName.c_str()); + + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + + Kokkos::printf("decisions: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + Kokkos::printf("%5d ", results(offset + k)); + } + Kokkos::printf("\n"); + } +#endif + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("Done with row %d\n", rlid); +#endif + + size_t start = A.graph.row_map(rlid); + size_t end = A.graph.row_map(rlid + 1); + for (size_t i = start; i < end; ++i) { + if (results(i) == KEEP) { + ++(*nnz_filtered); + } + } + if (final) + filtered_rowptr(rlid + 1) = *nnz_filtered; + } + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("Done with block row %d\nGraph indices ", brlid); +#endif + + local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + nextIndices[block_index] = 0; + } + local_ordinal_type prev_bclid = -1; + while (true) { + local_ordinal_type min_block_index = -1; + local_ordinal_type min_clid = ATS::max(); + local_ordinal_type min_offset = -1; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + auto rlid = blockSize * brlid + block_index; + auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; + if (offset == A.graph.row_map(rlid + 1)) + continue; + auto clid = A.graph.entries(offset); + if (clid < min_clid) { + min_block_index = block_index; + min_clid = clid; + min_offset = offset; + } + } + if (min_block_index == -1) + break; + ++nextIndices[min_block_index]; + auto bclid = ghosted_point_to_block(min_clid); + if (prev_bclid < bclid) { + if (results(min_offset) == KEEP) { + ++(*nnz_graph); +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("%5d ", bclid); +#endif + prev_bclid = bclid; + } + } + } +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("\n"); +#endif + if (final) + graph_rowptr(brlid + 1) = *nnz_graph; + } +}; + +template +class VectorCountingFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using block_indices_view_type = Kokkos::View; + + using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; + using ATS = Kokkos::ArithTraits; + + local_matrix_type A; + local_ordinal_type blockSize; + block_indices_view_type ghosted_point_to_block; + results_view results; + rowptr_type filtered_rowptr; + rowptr_type graph_rowptr; + + bool firstFunctor; + functor_type functor; + + std::vector functorNames; + + public: + VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, functor_type& functor_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filtered_rowptr(filtered_rowptr_) + , graph_rowptr(graph_rowptr_) + , functor(functor_) { +#ifdef MUELU_COALESCE_DROP_DEBUG + std::string mangledFunctorName = typeid(decltype(functor)).name(); + int status = 0; + char* demangledFunctorName = 0; + demangledFunctorName = abi::__cxa_demangle(mangledFunctorName.c_str(), 0, 0, &status); + functorName = demangledFunctorName; +#endif + } + + KOKKOS_INLINE_FUNCTION + void join(Kokkos::pair& dest, const Kokkos::pair& src) const { + dest.first += src.first; + dest.second += src.second; + } + + KOKKOS_INLINE_FUNCTION + void operatorRow(const local_ordinal_type rlid) const { + functor(rlid); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type brlid, Kokkos::pair& nnz, const bool& final) const { + auto nnz_filtered = &nnz.first; + auto nnz_graph = &nnz.second; + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("\nStarting on block row %d\n", brlid); +#endif + for (local_ordinal_type rlid = blockSize * brlid; rlid < blockSize * (brlid + 1); ++rlid) { +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("\nStarting on row %d\n", rlid); + + auto row = A.rowConst(rlid); + + Kokkos::printf("indices: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto clid = row.colidx(k); + Kokkos::printf("%5d ", clid); + } + Kokkos::printf("\n"); + + Kokkos::printf("values: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + auto val = row.value(k); + Kokkos::printf("%5f ", val); + } + Kokkos::printf("\n"); + } +#endif + + functor(rlid); + +#ifdef MUELU_COALESCE_DROP_DEBUG + { + Kokkos::printf("%s\n", functorName.c_str()); + + auto row = A.rowConst(rlid); + const size_t offset = A.graph.row_map(rlid); + + Kokkos::printf("decisions: "); + for (local_ordinal_type k = 0; k < row.length; ++k) { + Kokkos::printf("%5d ", results(offset + k)); + } + Kokkos::printf("\n"); + } +#endif + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("Done with row %d\n", rlid); +#endif + + size_t start = A.graph.row_map(rlid); + size_t end = A.graph.row_map(rlid + 1); + for (size_t i = start; i < end; ++i) { + if (results(i) == KEEP) { + ++(*nnz_filtered); + } + } + if (final) + filtered_rowptr(rlid + 1) = *nnz_filtered; + } + +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("Done with block row %d\nGraph indices ", brlid); +#endif + + local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + nextIndices[block_index] = 0; + } + local_ordinal_type prev_bclid = -1; + while (true) { + local_ordinal_type min_block_index = -1; + local_ordinal_type min_clid = ATS::max(); + local_ordinal_type min_offset = -1; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + auto rlid = blockSize * brlid + block_index; + auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; + if (offset == A.graph.row_map(rlid + 1)) + continue; + auto clid = A.graph.entries(offset); + if (clid < min_clid) { + min_block_index = block_index; + min_clid = clid; + min_offset = offset; + } + } + if (min_block_index == -1) + break; + ++nextIndices[min_block_index]; + auto bclid = ghosted_point_to_block(min_clid); + if (prev_bclid < bclid) { + if (results(min_offset) == KEEP) { + ++(*nnz_graph); +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("%5d ", bclid); +#endif + prev_bclid = bclid; + } + } + } +#ifdef MUELU_COALESCE_DROP_DEBUG + Kokkos::printf("\n"); +#endif + if (final) + graph_rowptr(brlid + 1) = *nnz_graph; + } +}; + +/*! + @class VectorFillNoReuseFunctor + @brief Functor does not reuse the graph of the matrix for a problem with blockSize>1. + + The dropped graph and the filtered matrix are built from scratch. + Lumps dropped entries to the diagonal if lumping==true. +*/ +template +class VectorFillFunctor { + private: + using scalar_type = typename local_matrix_type::value_type; + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using local_graph_type = typename local_matrix_type::staticcrsgraph_type; + using memory_space = typename local_matrix_type::memory_space; + using results_view = Kokkos::View; + using ATS = Kokkos::ArithTraits; + using OTS = Kokkos::ArithTraits; + using block_indices_view_type = Kokkos::View; + + local_matrix_type A; + local_ordinal_type blockSize; + block_indices_view_type ghosted_point_to_block; + results_view results; + local_matrix_type filteredA; + local_graph_type graph; + const scalar_type zero = ATS::zero(); + + public: + VectorFillFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, local_matrix_type& filteredA_, local_graph_type& graph_) + : A(A_) + , blockSize(blockSize_) + , ghosted_point_to_block(ghosted_point_to_block_) + , results(results_) + , filteredA(filteredA_) + , graph(graph_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const local_ordinal_type brlid) const { + for (local_ordinal_type rlid = blockSize * brlid; rlid < blockSize * (brlid + 1); ++rlid) { + auto rowA = A.row(rlid); + size_t row_start = A.graph.row_map(rlid); + auto rowFilteredA = filteredA.row(rlid); + local_ordinal_type j = 0; + scalar_type diagCorrection = zero; + local_ordinal_type diagOffset = -1; + for (local_ordinal_type k = 0; k < rowA.length; ++k) { + if constexpr (lumping) { + local_ordinal_type clid = rowA.colidx(k); + if (rlid == clid) { + diagOffset = j; + } + } + if (results(row_start + k) == KEEP) { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = rowA.value(k); + ++j; + } else if constexpr (lumping) { + diagCorrection += rowA.value(k); + if constexpr (reuse) { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = zero; + ++j; + } + } else if constexpr (reuse) { + rowFilteredA.colidx(j) = rowA.colidx(k); + rowFilteredA.value(j) = zero; + ++j; + } + } + if constexpr (lumping) { + rowFilteredA.value(diagOffset) += diagCorrection; + } + } + + local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + nextIndices[block_index] = 0; + } + local_ordinal_type prev_bclid = -1; + + local_ordinal_type j = graph.row_map(brlid); + while (true) { + local_ordinal_type min_block_index = -1; + local_ordinal_type min_clid = OTS::max(); + local_ordinal_type min_offset = -1; + for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { + auto rlid = blockSize * brlid + block_index; + auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; + if (offset == A.graph.row_map(rlid + 1)) + continue; + auto clid = A.graph.entries(offset); + if (clid < min_clid) { + min_block_index = block_index; + min_clid = clid; + min_offset = offset; + } + } + if (min_block_index == -1) + break; + ++nextIndices[min_block_index]; + auto bclid = ghosted_point_to_block(min_clid); + if (prev_bclid < bclid) { + if (results(min_offset) == KEEP) { + graph.entries(j) = bclid; + ++j; + prev_bclid = bclid; + } + } + } + } +}; + +} // namespace MueLu::MatrixConstruction + +#endif diff --git a/packages/muelu/src/Headers/LO-GO-NO.tmpl b/packages/muelu/src/Headers/LO-GO-NO.tmpl index a02cc88af6fe..279e44678a32 100644 --- a/packages/muelu/src/Headers/LO-GO-NO.tmpl +++ b/packages/muelu/src/Headers/LO-GO-NO.tmpl @@ -1,12 +1,3 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - #ifdef MUELU_$TMPL_UPPERCASECLASS_SHORT using $TMPL_CLASS [[maybe_unused]] = MueLu::$TMPL_CLASS; #endif diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp index 6418213b359c..a5bf3f68b544 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp @@ -209,9 +209,6 @@ using MergedBlockedMatrixFactory [[maybe_unused]] = MueLu::MergedBlockedMatrixFa #ifdef MUELU_MERGEDSMOOTHER_SHORT using MergedSmoother [[maybe_unused]] = MueLu::MergedSmoother; #endif -#ifdef MUELU_MLPARAMETERLISTINTERPRETER_SHORT -using MLParameterListInterpreter [[maybe_unused]] = MueLu::MLParameterListInterpreter; -#endif #ifdef MUELU_MULTIVECTORTRANSFERFACTORY_SHORT using MultiVectorTransferFactory [[maybe_unused]] = MueLu::MultiVectorTransferFactory; #endif @@ -221,9 +218,6 @@ using NotayAggregationFactory [[maybe_unused]] = MueLu::NotayAggregationFactory< #ifdef MUELU_NULLSPACEFACTORY_SHORT using NullspaceFactory [[maybe_unused]] = MueLu::NullspaceFactory; #endif -#ifdef MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -using NullspaceFactory_kokkos [[maybe_unused]] = MueLu::NullspaceFactory_kokkos; -#endif #ifdef MUELU_NULLSPACEPRESMOOTHFACTORY_SHORT using NullspacePresmoothFactory [[maybe_unused]] = MueLu::NullspacePresmoothFactory; #endif diff --git a/packages/muelu/src/Headers/Non-Templated.tmpl b/packages/muelu/src/Headers/Non-Templated.tmpl index 6b7294ffa2df..c0993b86a4d5 100644 --- a/packages/muelu/src/Headers/Non-Templated.tmpl +++ b/packages/muelu/src/Headers/Non-Templated.tmpl @@ -1,12 +1,3 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - #ifdef MUELU_$TMPL_UPPERCASECLASS_SHORT using $TMPL_CLASS [[maybe_unused]] = MueLu::$TMPL_CLASS; #endif diff --git a/packages/muelu/src/Headers/SC-LO-GO-NO.tmpl b/packages/muelu/src/Headers/SC-LO-GO-NO.tmpl index 2004b62dfbfa..86161dee5add 100644 --- a/packages/muelu/src/Headers/SC-LO-GO-NO.tmpl +++ b/packages/muelu/src/Headers/SC-LO-GO-NO.tmpl @@ -1,12 +1,3 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - #ifdef MUELU_$TMPL_UPPERCASECLASS_SHORT using $TMPL_CLASS [[maybe_unused]] = MueLu::$TMPL_CLASS; #endif diff --git a/packages/muelu/src/Headers/gen_UseShortNames.sh b/packages/muelu/src/Headers/gen_UseShortNames.sh index 49a723c4a424..7bbe12dc5e1e 100755 --- a/packages/muelu/src/Headers/gen_UseShortNames.sh +++ b/packages/muelu/src/Headers/gen_UseShortNames.sh @@ -6,7 +6,16 @@ classListDir=../Utils/ClassList/ -echo "// Type definitions for templated classes (generally graph-related) that do not require a scalar." > MueLu_UseShortNamesOrdinal.hpp +echo "// @HEADER" > MueLu_UseShortNamesOrdinal.hpp +echo "// *****************************************************************************" >> MueLu_UseShortNamesOrdinal.hpp +echo "// MueLu: A package for multigrid based preconditioning" >> MueLu_UseShortNamesOrdinal.hpp +echo "//" >> MueLu_UseShortNamesOrdinal.hpp +echo "// Copyright 2012 NTESS and the MueLu contributors." >> MueLu_UseShortNamesOrdinal.hpp +echo "// SPDX-License-Identifier: BSD-3-Clause" >> MueLu_UseShortNamesOrdinal.hpp +echo "// *****************************************************************************" >> MueLu_UseShortNamesOrdinal.hpp +echo "// @HEADER" >> MueLu_UseShortNamesOrdinal.hpp +echo "" >> MueLu_UseShortNamesOrdinal.hpp +echo "// Type definitions for templated classes (generally graph-related) that do not require a scalar." >> MueLu_UseShortNamesOrdinal.hpp echo >> MueLu_UseShortNamesOrdinal.hpp echo "#include " >> MueLu_UseShortNamesOrdinal.hpp echo >> MueLu_UseShortNamesOrdinal.hpp @@ -27,8 +36,16 @@ done # # Scalar # - -echo "// New definition of types using the types Scalar, LocalOrdinal, GlobalOrdinal, Node of the current context." > MueLu_UseShortNamesScalar.hpp +echo "// @HEADER" > MueLu_UseShortNamesScalar.hpp +echo "// *****************************************************************************" >> MueLu_UseShortNamesScalar.hpp +echo "// MueLu: A package for multigrid based preconditioning" >> MueLu_UseShortNamesScalar.hpp +echo "//" >> MueLu_UseShortNamesScalar.hpp +echo "// Copyright 2012 NTESS and the MueLu contributors." >> MueLu_UseShortNamesScalar.hpp +echo "// SPDX-License-Identifier: BSD-3-Clause" >> MueLu_UseShortNamesScalar.hpp +echo "// *****************************************************************************" >> MueLu_UseShortNamesScalar.hpp +echo "// @HEADER" >> MueLu_UseShortNamesScalar.hpp +echo "" >> MueLu_UseShortNamesScalar.hpp +echo "// New definition of types using the types Scalar, LocalOrdinal, GlobalOrdinal, Node of the current context." >> MueLu_UseShortNamesScalar.hpp echo >> MueLu_UseShortNamesScalar.hpp echo "#include " >> MueLu_UseShortNamesScalar.hpp echo >> MueLu_UseShortNamesScalar.hpp diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp index a6c6add07da2..5176bc2fb677 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp @@ -126,7 +126,6 @@ #include "MueLu_CoalesceDropFactory_kokkos_fwd.hpp" #include "MueLu_GeometricInterpolationPFactory_kokkos_fwd.hpp" #ifdef HAVE_MUELU_DEPRECATED_CODE -#include "MueLu_NullspaceFactory_kokkos_fwd.hpp" #include "MueLu_SaPFactory_kokkos_fwd.hpp" #endif #include "MueLu_SemiCoarsenPFactory_kokkos_fwd.hpp" diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp index cc144b4366fe..0694928ceceb 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp @@ -105,7 +105,6 @@ #include "MueLu_CoalesceDropFactory_kokkos.hpp" #include "MueLu_GeometricInterpolationPFactory_kokkos.hpp" #ifdef HAVE_MUELU_DEPRECATED_CODE -#include "MueLu_NullspaceFactory_kokkos.hpp" #include "MueLu_SaPFactory_kokkos.hpp" #endif #include "MueLu_SemiCoarsenPFactory_kokkos.hpp" @@ -216,7 +215,6 @@ RCP FactoryFactory if (factoryName == "CoalesceDropFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); if (factoryName == "GeometricInterpolationPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); #ifdef HAVE_MUELU_DEPRECATED_CODE - if (factoryName == "NullspaceFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); if (factoryName == "SaPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); #endif if (factoryName == "SemiCoarsenPFactory_kokkos") return Build2(paramList, factoryMapIn, factoryManagersIn); diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp deleted file mode 100644 index 8b22c8fc7f90..000000000000 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp +++ /dev/null @@ -1,202 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_MLPARAMETERLISTINTERPRETER_DECL_HPP -#define MUELU_MLPARAMETERLISTINTERPRETER_DECL_HPP - -#include - -#include -#include -#include - -#include "MueLu_ConfigDefs.hpp" -#include "MueLu_HierarchyManager.hpp" -#include "MueLu_MLParameterListInterpreter_fwd.hpp" - -#include "MueLu_Hierarchy_fwd.hpp" -#include "MueLu_SmootherFactory_fwd.hpp" - -#include "MueLu_TentativePFactory_fwd.hpp" -#include "MueLu_SaPFactory_fwd.hpp" -#include "MueLu_PgPFactory_fwd.hpp" -#include "MueLu_AmalgamationFactory_fwd.hpp" -#include "MueLu_TransPFactory_fwd.hpp" -#include "MueLu_GenericRFactory_fwd.hpp" -#include "MueLu_SmootherPrototype_fwd.hpp" -#include "MueLu_TrilinosSmoother_fwd.hpp" -#include "MueLu_IfpackSmoother_fwd.hpp" -#include "MueLu_DirectSolver_fwd.hpp" -#include "MueLu_RAPFactory_fwd.hpp" -#include "MueLu_CoalesceDropFactory_fwd.hpp" -#include "MueLu_UncoupledAggregationFactory_fwd.hpp" -#include "MueLu_NullspaceFactory_fwd.hpp" -#include "MueLu_FactoryBase_fwd.hpp" - -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) -#include "MueLu_RepartitionHeuristicFactory_fwd.hpp" -#include "MueLu_RepartitionFactory_fwd.hpp" -#include "MueLu_RebalanceTransferFactory_fwd.hpp" -#include "MueLu_IsorropiaInterface_fwd.hpp" -#include "MueLu_RebalanceAcFactory_fwd.hpp" -#include "MueLu_RebalanceMapFactory_fwd.hpp" -#endif - -#ifdef HAVE_MUELU_DEPRECATED_CODE -#ifdef MueLu_SHOW_DEPRECATED_WARNINGS -#warning "The header file MueLu_MLParameterListInterpreter.hpp is deprecated" -#endif -#else -#error "The header file MueLu_MLParameterListInterpreter.hpp is deprecated" -#endif - -namespace MueLu { - -/* - Utility that from an existing Teuchos::ParameterList creates a new list, in - which level-specific parameters are replaced with sublists. - - Currently, level-specific parameters that begin with "smoother:" - or "aggregation:" are placed in sublists. Coarse options are also placed - in a coarse list. - - Example: - Input: - smoother: type (level 0) = symmetric Gauss-Seidel - smoother: sweeps (level 0) = 1 - Output: - smoother: list (level 0) -> - smoother: type = symmetric Gauss-Seidel - smoother: sweeps = 1 -*/ -// This function is a copy of ML_CreateSublists to avoid dependency on ML -// Throw exception on error instead of exit() -void CreateSublists(const ParameterList& List, ParameterList& newList); - -/*! - @class MLParameterListInterpreter class. - @brief Class that accepts ML-style parameters and builds a MueLu preconditioner. - This interpreter uses the same default values as ML. This allows to compare ML/MueLu results - - The parameter list is validated only if the package ML is available and parameter "ML validate parameter list" is true. - TODO: A warning is issued if ML is not available -*/ - -template -class MLParameterListInterpreter : public HierarchyManager { -#undef MUELU_MLPARAMETERLISTINTERPRETER_SHORT -#include "MueLu_UseShortNames.hpp" - - public: - //! @name Constructors/Destructors. - //@{ - - //! Constructor. - MLParameterListInterpreter() - : nullspace_(NULL) - , blksize_(1) {} - - //! Constructor. - //! @param paramList: parameter list with ML parameters - //! @param[in] comm (RCP >): Optional RCP of a Teuchos communicator (default: Teuchos::null) - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - MLParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm = Teuchos::null, std::vector > factoryList = std::vector >(0)); - - //! Constructor. - //! @param xmlFileName: file name for XML file with ML parameters - //! @param factoryList: vector with RCP of FactoryBase objects - //! - //! The factories in factoryList allow the user to add user-specific factories to the MueLu Hierarchy. - //! The idea is to be able to add some factories that write out some debug information etc. which are not handled by the ML - //! Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - MLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList = std::vector >(0)); - - //! Destructor. - virtual ~MLParameterListInterpreter() = default; - - //@} - - //@{ - - void SetParameterList(const Teuchos::ParameterList& paramList); - - //@} - - //@{ - - //! Setup Hierarchy object - virtual void SetupHierarchy(Hierarchy& H) const; - - //@} - - //@{ - - //! @name static helper functions translating parameter list to factories - //! @brief static helper functions that also can be used from outside for translating ML parameters into MueLu objects - //@{ - - //! Read smoother options and build the corresponding smoother factory - // @param AFact: Factory used by smoother to find 'A' - static RCP GetSmootherFactory(const Teuchos::ParameterList& paramList, const RCP& AFact = Teuchos::null); - - //@} - - //! @name Handling of additional user-specific transfer factories - //@{ - /*! @brief Add transfer factory in the end of list of transfer factories for RAPFactory. - - This allows the user to add user-specific factories to the MueLu Hierarchy. The idea is to be able - to add some factories that write out some debug information etc. which are not handled by the ML - Parameter List itself. See information about the RAPFactory::AddTransferFactory method, too! - */ - void AddTransferFactory(const RCP& factory); - - //! Returns number of transfer factories. - size_t NumTransferFactories() const; - //@} - - private: - //! nullspace can be embedded in the ML parameter list - int nullspaceDim_; - double* nullspace_; // TODO: replace by Teuchos::ArrayRCP<> - - //! coordinates can be embedded in the ML parameter list - double* xcoord_; - double* ycoord_; - double* zcoord_; - - //! list of user-defined transfer Factories - //! We use this vector to add some special user-given factories to the Hierarchy (RAPFactory) - //! This way the user can extend the standard functionality of the MLParameterListInterpreter beyond the - //! capabibilities of ML. - std::vector > TransferFacts_; - - //@{ Matrix configuration - - //! Setup Operator object - virtual void SetupOperator(Operator& Op) const; - - //! Matrix configuration storage - int blksize_; - - //@} - -}; // class MLParameterListInterpreter - -} // namespace MueLu - -#define MUELU_MLPARAMETERLISTINTERPRETER_SHORT -#endif /* MUELU_MLPARAMETERLISTINTERPRETER_DECL_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp deleted file mode 100644 index 09eb4bc28e60..000000000000 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_def.hpp +++ /dev/null @@ -1,728 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_MLPARAMETERLISTINTERPRETER_DEF_HPP -#define MUELU_MLPARAMETERLISTINTERPRETER_DEF_HPP - -#include - -#include "MueLu_ConfigDefs.hpp" -#if defined(HAVE_MUELU_ML) -#include -#endif - -#include -#include -#include -#include -#include - -#include "MueLu_MLParameterListInterpreter_decl.hpp" - -#include "MueLu_Level.hpp" -#include "MueLu_Hierarchy.hpp" -#include "MueLu_FactoryManager.hpp" - -#include "MueLu_TentativePFactory.hpp" -#include "MueLu_SaPFactory.hpp" -#include "MueLu_PgPFactory.hpp" -#include "MueLu_AmalgamationFactory.hpp" -#include "MueLu_TransPFactory.hpp" -#include "MueLu_GenericRFactory.hpp" -#include "MueLu_SmootherPrototype.hpp" -#include "MueLu_SmootherFactory.hpp" -#include "MueLu_TrilinosSmoother.hpp" -#include "MueLu_IfpackSmoother.hpp" -#include "MueLu_DirectSolver.hpp" -#include "MueLu_HierarchyUtils.hpp" -#include "MueLu_RAPFactory.hpp" -#include "MueLu_CoalesceDropFactory.hpp" -#include "MueLu_UncoupledAggregationFactory.hpp" -#include "MueLu_NullspaceFactory.hpp" -#include "MueLu_ParameterListUtils.hpp" - -#include "MueLu_CoalesceDropFactory_kokkos.hpp" -// #include "MueLu_CoordinatesTransferFactory_kokkos.hpp" -#include "MueLu_TentativePFactory_kokkos.hpp" - -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) -#include "MueLu_IsorropiaInterface.hpp" -#include "MueLu_RepartitionHeuristicFactory.hpp" -#include "MueLu_RepartitionFactory.hpp" -#include "MueLu_RebalanceTransferFactory.hpp" -#include "MueLu_RepartitionInterface.hpp" -#include "MueLu_RebalanceAcFactory.hpp" -//#include "MueLu_RebalanceMapFactory.hpp" -#endif - -// Note: do not add options that are only recognized by MueLu. - -// TODO: this parameter list interpreter should force MueLu to use default ML parameters -// - Ex: smoother sweep=2 by default for ML - -// Read a parameter value from a parameter list and store it into a variable named 'varName' -#define MUELU_READ_PARAM(paramList, paramStr, varType, defaultValue, varName) \ - varType varName = defaultValue; \ - if (paramList.isParameter(paramStr)) varName = paramList.get(paramStr); - -// Read a parameter value from a paraeter list and copy it into a new parameter list (with another parameter name) -#define MUELU_COPY_PARAM(paramList, paramStr, varType, defaultValue, outParamList, outParamStr) \ - if (paramList.isParameter(paramStr)) \ - outParamList.set(outParamStr, paramList.get(paramStr)); \ - else \ - outParamList.set(outParamStr, static_cast(defaultValue)); - -namespace MueLu { - -template -MLParameterListInterpreter::MLParameterListInterpreter(Teuchos::ParameterList& paramList, Teuchos::RCP > comm, std::vector > factoryList) - : nullspace_(NULL) - , xcoord_(NULL) - , ycoord_(NULL) - , zcoord_(NULL) - , TransferFacts_(factoryList) - , blksize_(1) { - if (paramList.isParameter("xml parameter file")) { - std::string filename = paramList.get("xml parameter file", ""); - if (filename.length() != 0) { - TEUCHOS_TEST_FOR_EXCEPTION(comm.is_null(), Exceptions::RuntimeError, "xml parameter file requires a valid comm"); - Teuchos::ParameterList paramList2 = paramList; - Teuchos::updateParametersFromXmlFileAndBroadcast(filename, Teuchos::Ptr(¶mList2), *comm); - paramList2.remove("xml parameter file"); - SetParameterList(paramList2); - } else - SetParameterList(paramList); - } else - SetParameterList(paramList); -} - -template -MLParameterListInterpreter::MLParameterListInterpreter(const std::string& xmlFileName, std::vector > factoryList) - : nullspace_(NULL) - , TransferFacts_(factoryList) - , blksize_(1) { - Teuchos::RCP paramList = Teuchos::getParametersFromXmlFile(xmlFileName); - SetParameterList(*paramList); -} - -template -void MLParameterListInterpreter::SetParameterList(const Teuchos::ParameterList& paramList_in) { - Teuchos::ParameterList paramList = paramList_in; - - // - // Read top-level of the parameter list - // - - // hard-coded default values == ML defaults according to the manual - MUELU_READ_PARAM(paramList, "ML output", int, 0, verbosityLevel); - MUELU_READ_PARAM(paramList, "max levels", int, 10, maxLevels); - MUELU_READ_PARAM(paramList, "PDE equations", int, 1, nDofsPerNode); - - MUELU_READ_PARAM(paramList, "coarse: max size", int, 128, maxCoarseSize); - - MUELU_READ_PARAM(paramList, "aggregation: type", std::string, "Uncoupled", agg_type); - // MUELU_READ_PARAM(paramList, "aggregation: threshold", double, 0.0, agg_threshold); - MUELU_READ_PARAM(paramList, "aggregation: damping factor", double, (double)4 / (double)3, agg_damping); - // MUELU_READ_PARAM(paramList, "aggregation: smoothing sweeps", int, 1, agg_smoothingsweeps); - MUELU_READ_PARAM(paramList, "aggregation: nodes per aggregate", int, 1, minPerAgg); - MUELU_READ_PARAM(paramList, "aggregation: keep Dirichlet bcs", bool, false, bKeepDirichletBcs); // This is a MueLu specific extension that does not exist in ML - MUELU_READ_PARAM(paramList, "aggregation: max neighbours already aggregated", int, 0, maxNbrAlreadySelected); // This is a MueLu specific extension that does not exist in M - MUELU_READ_PARAM(paramList, "aggregation: aux: enable", bool, false, agg_use_aux); - MUELU_READ_PARAM(paramList, "aggregation: aux: threshold", double, false, agg_aux_thresh); - - MUELU_READ_PARAM(paramList, "null space: type", std::string, "default vectors", nullspaceType); - MUELU_READ_PARAM(paramList, "null space: dimension", int, -1, nullspaceDim); // TODO: ML default not in documentation - MUELU_READ_PARAM(paramList, "null space: vectors", double*, NULL, nullspaceVec); // TODO: ML default not in documentation - - MUELU_READ_PARAM(paramList, "energy minimization: enable", bool, false, bEnergyMinimization); - - MUELU_READ_PARAM(paramList, "RAP: fix diagonal", bool, false, bFixDiagonal); // This is a MueLu specific extension that does not exist in ML - - MUELU_READ_PARAM(paramList, "x-coordinates", double*, NULL, xcoord); - MUELU_READ_PARAM(paramList, "y-coordinates", double*, NULL, ycoord); - MUELU_READ_PARAM(paramList, "z-coordinates", double*, NULL, zcoord); - - // - // Move smoothers/aggregation/coarse parameters to sublists - // - - // ML allows to have level-specific smoothers/aggregation/coarse parameters at the top level of the list or/and defined in sublists: - // See also: ML Guide section 6.4.1, MueLu::CreateSublists, ML_CreateSublists - ParameterList paramListWithSubList; - MueLu::CreateSublists(paramList, paramListWithSubList); - paramList = paramListWithSubList; // swap - - // pull out "use kokkos refactor" - bool setKokkosRefactor = false; - bool useKokkosRefactor = !Node::is_serial; - if (paramList.isType("use kokkos refactor")) { - useKokkosRefactor = paramList.get("use kokkos refactor"); - setKokkosRefactor = true; - paramList.remove("use kokkos refactor"); - } - - // - // Validate parameter list - // - - { - bool validate = paramList.get("ML validate parameter list", true); /* true = default in ML */ - if (validate) { -#if defined(HAVE_MUELU_ML) && defined(HAVE_MUELU_EPETRA) - // Validate parameter list using ML validator - int depth = paramList.get("ML validate depth", 5); /* 5 = default in ML */ - TEUCHOS_TEST_FOR_EXCEPTION(!ML_Epetra::ValidateMLPParameters(paramList, depth), Exceptions::RuntimeError, - "ERROR: ML's Teuchos::ParameterList contains incorrect parameter!"); -#else - // If no validator available: issue a warning and set parameter value to false in the output list - this->GetOStream(Warnings0) << "Warning: MueLu_ENABLE_ML=OFF. The parameter list cannot be validated." << std::endl; - paramList.set("ML validate parameter list", false); - -#endif // HAVE_MUELU_ML - } // if(validate) - } // scope - - // Matrix option - blksize_ = nDofsPerNode; - - // Translate verbosity parameter - - // Translate verbosity parameter - MsgType eVerbLevel = None; - if (verbosityLevel == 0) eVerbLevel = None; - if (verbosityLevel >= 1) eVerbLevel = Low; - if (verbosityLevel >= 5) eVerbLevel = Medium; - if (verbosityLevel >= 10) eVerbLevel = High; - if (verbosityLevel >= 11) eVerbLevel = Extreme; - if (verbosityLevel >= 42) eVerbLevel = Test; - if (verbosityLevel >= 43) eVerbLevel = InterfaceTest; - this->verbosity_ = eVerbLevel; - - TEUCHOS_TEST_FOR_EXCEPTION(agg_type != "Uncoupled", Exceptions::RuntimeError, - "MueLu::MLParameterListInterpreter::SetParameterList(): parameter \"aggregation: type\": only 'Uncoupled' aggregation is supported."); - - // Create MueLu factories - RCP dropFact; - if (useKokkosRefactor) - dropFact = rcp(new CoalesceDropFactory_kokkos()); - else - dropFact = rcp(new CoalesceDropFactory()); - - if (agg_use_aux) { - dropFact->SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); - dropFact->SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(agg_aux_thresh)); - } - - // Uncoupled aggregation - RCP AggFact = Teuchos::null; - AggFact = rcp(new UncoupledAggregationFactory()); - - AggFact->SetFactory("Graph", dropFact); - AggFact->SetFactory("DofsPerNode", dropFact); - AggFact->SetParameter("aggregation: preserve Dirichlet points", Teuchos::ParameterEntry(bKeepDirichletBcs)); - AggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); - AggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(maxNbrAlreadySelected)); - AggFact->SetParameter("aggregation: min agg size", Teuchos::ParameterEntry(minPerAgg)); - - if (verbosityLevel > 3) { - std::ostringstream oss; - oss << "========================= Aggregate option summary  =========================" << std::endl; - oss << "min Nodes per aggregate :              " << minPerAgg << std::endl; - oss << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; - oss << "aggregate ordering :                    natural" << std::endl; - oss << "=============================================================================" << std::endl; - this->GetOStream(Runtime1) << oss.str(); - } - - RCP PFact; - RCP RFact; - RCP PtentFact; - if (useKokkosRefactor) - PtentFact = rcp(new TentativePFactory_kokkos()); - else - PtentFact = rcp(new TentativePFactory()); - if (agg_damping == 0.0 && bEnergyMinimization == false) { - // tentative prolongation operator (PA-AMG) - PFact = PtentFact; - RFact = rcp(new TransPFactory()); - } else if (agg_damping != 0.0 && bEnergyMinimization == false) { - // smoothed aggregation (SA-AMG) - RCP SaPFact = rcp(new SaPFactory()); - SaPFact->SetParameter("sa: damping factor", ParameterEntry(agg_damping)); - PFact = SaPFact; - RFact = rcp(new TransPFactory()); - } else if (bEnergyMinimization == true) { - // Petrov Galerkin PG-AMG smoothed aggregation (energy minimization in ML) - PFact = rcp(new PgPFactory()); - RFact = rcp(new GenericRFactory()); - } - - RCP AcFact = rcp(new RAPFactory()); - AcFact->SetParameter("RepairMainDiagonal", Teuchos::ParameterEntry(bFixDiagonal)); - for (size_t i = 0; i < TransferFacts_.size(); i++) { - AcFact->AddTransferFactory(TransferFacts_[i]); - } - - // - // introduce rebalancing - // -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - Teuchos::RCP RebalancedPFact = Teuchos::null; - Teuchos::RCP RebalancedRFact = Teuchos::null; - Teuchos::RCP RepartitionFact = Teuchos::null; - Teuchos::RCP RebalancedAFact = Teuchos::null; - - MUELU_READ_PARAM(paramList, "repartition: enable", int, 0, bDoRepartition); - if (bDoRepartition == 1) { - // The Factory Manager will be configured to return the rebalanced versions of P, R, A by default. - // Everytime we want to use the non-rebalanced versions, we need to explicitly define the generating factory. - RFact->SetFactory("P", PFact); - // - AcFact->SetFactory("P", PFact); - AcFact->SetFactory("R", RFact); - - // define rebalancing factory for coarse matrix - Teuchos::RCP > rebAmalgFact = Teuchos::rcp(new MueLu::AmalgamationFactory()); - rebAmalgFact->SetFactory("A", AcFact); - - MUELU_READ_PARAM(paramList, "repartition: max min ratio", double, 1.3, maxminratio); - MUELU_READ_PARAM(paramList, "repartition: min per proc", int, 512, minperproc); - - // Repartitioning heuristic - RCP RepartitionHeuristicFact = Teuchos::rcp(new RepartitionHeuristicFactory()); - { - Teuchos::ParameterList paramListRepFact; - paramListRepFact.set("repartition: min rows per proc", minperproc); - paramListRepFact.set("repartition: max imbalance", maxminratio); - RepartitionHeuristicFact->SetParameterList(paramListRepFact); - } - RepartitionHeuristicFact->SetFactory("A", AcFact); - - // create "Partition" - Teuchos::RCP > isoInterface = Teuchos::rcp(new MueLu::IsorropiaInterface()); - isoInterface->SetFactory("A", AcFact); - isoInterface->SetFactory("number of partitions", RepartitionHeuristicFact); - isoInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); - - // create "Partition" by unamalgamtion - Teuchos::RCP > repInterface = Teuchos::rcp(new MueLu::RepartitionInterface()); - repInterface->SetFactory("A", AcFact); - repInterface->SetFactory("number of partitions", RepartitionHeuristicFact); - repInterface->SetFactory("AmalgamatedPartition", isoInterface); - // repInterface->SetFactory("UnAmalgamationInfo", rebAmalgFact); // not necessary? - - // Repartitioning (creates "Importer" from "Partition") - RepartitionFact = Teuchos::rcp(new RepartitionFactory()); - RepartitionFact->SetFactory("A", AcFact); - RepartitionFact->SetFactory("number of partitions", RepartitionHeuristicFact); - RepartitionFact->SetFactory("Partition", repInterface); - - // Reordering of the transfer operators - RebalancedPFact = Teuchos::rcp(new RebalanceTransferFactory()); - RebalancedPFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Interpolation"))); - RebalancedPFact->SetFactory("P", PFact); - RebalancedPFact->SetFactory("Nullspace", PtentFact); - RebalancedPFact->SetFactory("Importer", RepartitionFact); - - RebalancedRFact = Teuchos::rcp(new RebalanceTransferFactory()); - RebalancedRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction"))); - RebalancedRFact->SetFactory("R", RFact); - RebalancedRFact->SetFactory("Importer", RepartitionFact); - - // Compute Ac from rebalanced P and R - RebalancedAFact = Teuchos::rcp(new RebalanceAcFactory()); - RebalancedAFact->SetFactory("A", AcFact); - } -#else // #ifdef HAVE_MUELU_ISORROPIA - // Get rid of [-Wunused] warnings - //(void) - // - // ^^^ FIXME (mfh 17 Nov 2013) That definitely doesn't compile. -#endif - - // - // Nullspace factory - // - - // Set fine level nullspace - // extract pre-computed nullspace from ML parameter list - // store it in nullspace_ and nullspaceDim_ - if (nullspaceType != "default vectors") { - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceType != "pre-computed", Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (no pre-computed null space). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceDim == -1, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace dim == -1). error."); - TEUCHOS_TEST_FOR_EXCEPTION(nullspaceVec == NULL, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no valid nullspace (nullspace == NULL). You have to provide a valid fine-level nullspace in \'null space: vectors\'"); - - nullspaceDim_ = nullspaceDim; - nullspace_ = nullspaceVec; - } - - Teuchos::RCP nspFact = Teuchos::rcp(new NullspaceFactory("Nullspace")); - nspFact->SetFactory("Nullspace", PtentFact); - - // Stash coordinates - xcoord_ = xcoord; - ycoord_ = ycoord; - zcoord_ = zcoord; - - // - // Hierarchy + FactoryManager - // - - // Hierarchy options - this->numDesiredLevel_ = maxLevels; - this->maxCoarseSize_ = maxCoarseSize; - - // - // Coarse Smoother - // - ParameterList& coarseList = paramList.sublist("coarse: list"); - // check whether coarse solver is set properly. If not, set default coarse solver. - if (!coarseList.isParameter("smoother: type")) - coarseList.set("smoother: type", "Amesos-KLU"); // set default coarse solver according to ML 5.0 guide - RCP coarseFact = GetSmootherFactory(coarseList, Teuchos::null); - - // Smoothers Top Level Parameters - - RCP topLevelSmootherParam = ExtractSetOfParameters(paramList, "smoother"); - - // - - // Prepare factory managers - // TODO: smootherFact can be reuse accross level if same parameters/no specific parameterList - - for (int levelID = 0; levelID < maxLevels; levelID++) { - // - // Level FactoryManager - // - - RCP manager = rcp(new FactoryManager()); - if (setKokkosRefactor) - manager->SetKokkosRefactor(useKokkosRefactor); - - // - // Smoothers - // - - { - // Merge level-specific parameters with global parameters. level-specific parameters takes precedence. - // TODO: unit-test this part alone - - ParameterList levelSmootherParam = GetMLSubList(paramList, "smoother", levelID); // copy - MergeParameterList(*topLevelSmootherParam, levelSmootherParam, false); /* false = do no overwrite levelSmootherParam parameters by topLevelSmootherParam parameters */ - // std::cout << std::endl << "Merged List for level " << levelID << std::endl; - // std::cout << levelSmootherParam << std::endl; - - RCP smootherFact = GetSmootherFactory(levelSmootherParam, Teuchos::null); // TODO: missing AFact input arg. - - manager->SetFactory("Smoother", smootherFact); - } - - // - // Misc - // - - manager->SetFactory("CoarseSolver", coarseFact); // TODO: should not be done in the loop - manager->SetFactory("Graph", dropFact); - manager->SetFactory("Aggregates", AggFact); - manager->SetFactory("DofsPerNode", dropFact); - manager->SetFactory("Ptent", PtentFact); - -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - if (bDoRepartition == 1) { - manager->SetFactory("A", RebalancedAFact); - manager->SetFactory("P", RebalancedPFact); - manager->SetFactory("R", RebalancedRFact); - manager->SetFactory("Nullspace", RebalancedPFact); - manager->SetFactory("Importer", RepartitionFact); - } else { -#endif // #ifdef HAVE_MUELU_ISORROPIA - manager->SetFactory("Nullspace", nspFact); // use same nullspace factory throughout all multigrid levels - manager->SetFactory("A", AcFact); // same RAP factory for all levels - manager->SetFactory("P", PFact); // same prolongator and restrictor factories for all levels - manager->SetFactory("R", RFact); // same prolongator and restrictor factories for all levels -#if defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_MPI) - } -#endif - - this->AddFactoryManager(levelID, 1, manager); - } // for (level loop) -} - -template -void MLParameterListInterpreter::SetupHierarchy(Hierarchy& H) const { - // if nullspace_ has already been extracted from ML parameter list - // make nullspace available for MueLu - if (nullspace_ != NULL) { - RCP fineLevel = H.GetLevel(0); - RCP Op = fineLevel->Get >("A"); - RCP A = rcp_dynamic_cast(Op); - if (!A.is_null()) { - const RCP rowMap = fineLevel->Get >("A")->getRowMap(); - RCP nullspace = MultiVectorFactory::Build(rowMap, nullspaceDim_, true); - - for (size_t i = 0; i < Teuchos::as(nullspaceDim_); i++) { - Teuchos::ArrayRCP nullspacei = nullspace->getDataNonConst(i); - const size_t myLength = nullspace->getLocalLength(); - - for (size_t j = 0; j < myLength; j++) { - nullspacei[j] = nullspace_[i * myLength + j]; - } - } - - fineLevel->Set("Nullspace", nullspace); - } - } - - // Do the same for coordinates - size_t num_coords = 0; - double* coordPTR[3]; - if (xcoord_) { - coordPTR[0] = xcoord_; - num_coords++; - if (ycoord_) { - coordPTR[1] = ycoord_; - num_coords++; - if (zcoord_) { - coordPTR[2] = zcoord_; - num_coords++; - } - } - } - if (num_coords) { - Teuchos::RCP fineLevel = H.GetLevel(0); - Teuchos::RCP Op = fineLevel->Get >("A"); - Teuchos::RCP A = rcp_dynamic_cast(Op); - if (!A.is_null()) { - const Teuchos::RCP rowMap = fineLevel->Get >("A")->getRowMap(); - Teuchos::RCP coordinates = MultiVectorFactory::Build(rowMap, num_coords, true); - - for (size_t i = 0; i < num_coords; i++) { - Teuchos::ArrayRCP coordsi = coordinates->getDataNonConst(i); - const size_t myLength = coordinates->getLocalLength(); - for (size_t j = 0; j < myLength; j++) { - coordsi[j] = coordPTR[i][j]; - } - } - fineLevel->Set("Coordinates", coordinates); - } - } - - HierarchyManager::SetupHierarchy(H); -} - -// TODO: code factorization with MueLu_ParameterListInterpreter. -template -RCP > -MLParameterListInterpreter:: - GetSmootherFactory(const Teuchos::ParameterList& paramList, - const RCP& AFact) { - typedef Teuchos::ScalarTraits STS; - SC one = STS::one(); - - std::string type = "symmetric Gauss-Seidel"; // default - - // - // Get 'type' - // - - // //TODO: fix defaults!! - - // // Default coarse grid smoother - // std::string type; - // if ("smoother" == "coarse") { - // #if (defined(HAVE_MUELU_EPETRA) && defined( HAVE_MUELU_AMESOS)) || (defined(HAVE_MUELU_AMESOS2)) // FIXME: test is wrong (ex: compiled with Epetra&&Tpetra&&Amesos2 but without Amesos => error running Epetra problem) - // type = ""; // use default defined by AmesosSmoother or Amesos2Smoother - // #else - // type = "symmetric Gauss-Seidel"; // use a sym Gauss-Seidel (with no damping) as fallback "coarse solver" (TODO: needs Ifpack(2)) - // #endif - // } else { - // // TODO: default smoother? - // type = ""; - // } - - if (paramList.isParameter("smoother: type")) type = paramList.get("smoother: type"); - TEUCHOS_TEST_FOR_EXCEPTION(type.empty(), Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no \"smoother: type\" in the smoother parameter list" << std::endl - << paramList); - - // - // Create the smoother prototype - // - - RCP smooProto; - std::string ifpackType; - Teuchos::ParameterList smootherParamList; - - if (type == "Jacobi" || type == "Gauss-Seidel" || type == "symmetric Gauss-Seidel") { - if (type == "symmetric Gauss-Seidel") type = "Symmetric Gauss-Seidel"; // FIXME - - ifpackType = "RELAXATION"; - smootherParamList.set("relaxation: type", type); - - MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "relaxation: sweeps"); - MUELU_COPY_PARAM(paramList, "smoother: damping factor", Scalar, one, smootherParamList, "relaxation: damping factor"); - - smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); - smooProto->SetFactory("A", AFact); - - } else if (type == "Chebyshev" || type == "MLS") { - ifpackType = "CHEBYSHEV"; - - MUELU_COPY_PARAM(paramList, "smoother: sweeps", int, 2, smootherParamList, "chebyshev: degree"); - if (paramList.isParameter("smoother: MLS alpha")) { - MUELU_COPY_PARAM(paramList, "smoother: MLS alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); - } else { - MUELU_COPY_PARAM(paramList, "smoother: Chebyshev alpha", double, 20, smootherParamList, "chebyshev: ratio eigenvalue"); - } - - smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); - smooProto->SetFactory("A", AFact); - - } else if (type == "Hiptmair") { - ifpackType = "HIPTMAIR"; - std::string subSmootherType = "Chebyshev"; - if (paramList.isParameter("subsmoother: type")) - subSmootherType = paramList.get("subsmoother: type"); - std::string subSmootherIfpackType; - if (subSmootherType == "Chebyshev") - subSmootherIfpackType = "CHEBYSHEV"; - else if (subSmootherType == "Jacobi" || subSmootherType == "Gauss-Seidel" || subSmootherType == "symmetric Gauss-Seidel") { - if (subSmootherType == "symmetric Gauss-Seidel") subSmootherType = "Symmetric Gauss-Seidel"; // FIXME - subSmootherIfpackType = "RELAXATION"; - } else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << subSmootherType << "' not supported by MueLu."); - - smootherParamList.set("hiptmair: smoother type 1", subSmootherIfpackType); - smootherParamList.set("hiptmair: smoother type 2", subSmootherIfpackType); - - auto smoother1ParamList = smootherParamList.sublist("hiptmair: smoother list 1"); - auto smoother2ParamList = smootherParamList.sublist("hiptmair: smoother list 2"); - - if (subSmootherType == "Chebyshev") { - MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "chebyshev: degree"); - MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "chebyshev: degree"); - - MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother1ParamList, "chebyshev: ratio eigenvalue"); - MUELU_COPY_PARAM(paramList, "subsmoother: Chebyshev", double, 20, smoother2ParamList, "chebyshev: ratio eigenvalue"); - } else { - MUELU_COPY_PARAM(paramList, "subsmoother: edge sweeps", int, 2, smoother1ParamList, "relaxation: sweeps"); - MUELU_COPY_PARAM(paramList, "subsmoother: node sweeps", int, 2, smoother2ParamList, "relaxation: sweeps"); - - MUELU_COPY_PARAM(paramList, "subsmoother: SGS damping factor", double, 0.8, smoother2ParamList, "relaxation: damping factor"); - } - - smooProto = rcp(new TrilinosSmoother(ifpackType, smootherParamList, 0)); - smooProto->SetFactory("A", AFact); - - } else if (type == "IFPACK") { // TODO: this option is not described in the ML Guide v5.0 - -#if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_IFPACK) - ifpackType = paramList.get("smoother: ifpack type"); - - if (ifpackType == "ILU") { - // TODO fix this (type mismatch double vs. int) - // MUELU_COPY_PARAM(paramList, "smoother: ifpack level-of-fill", double /*int*/, 0.0 /*2*/, smootherParamList, "fact: level-of-fill"); - if (paramList.isParameter("smoother: ifpack level-of-fill")) - smootherParamList.set("fact: level-of-fill", Teuchos::as(paramList.get("smoother: ifpack level-of-fill"))); - else - smootherParamList.set("fact: level-of-fill", as(0)); - - MUELU_COPY_PARAM(paramList, "smoother: ifpack overlap", int, 2, smootherParamList, "partitioner: overlap"); - - // TODO change to TrilinosSmoother as soon as Ifpack2 supports all preconditioners from Ifpack - smooProto = - MueLu::GetIfpackSmoother(ifpackType, - smootherParamList, - paramList.get("smoother: ifpack overlap")); - smooProto->SetFactory("A", AFact); - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown ML smoother type " + type + " (IFPACK) not supported by MueLu. Only ILU is supported."); - } -#else - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: MueLu compiled without Ifpack support"); -#endif - - } else if (type.length() > strlen("Amesos") && type.substr(0, strlen("Amesos")) == "Amesos") { /* catch Amesos-* */ - std::string solverType = type.substr(strlen("Amesos") + 1); /* ("Amesos-KLU" -> "KLU") */ - - // Validator: following upper/lower case is what is allowed by ML - bool valid = false; - const int validatorSize = 5; - std::string validator[validatorSize] = {"Superlu", "Superludist", "KLU", "UMFPACK", "MUMPS"}; /* TODO: should "" be allowed? */ - for (int i = 0; i < validatorSize; i++) { - if (validator[i] == solverType) valid = true; - } - TEUCHOS_TEST_FOR_EXCEPTION(!valid, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported."); - - // FIXME: MueLu should accept any Upper/Lower case. Not the case for the moment - std::transform(solverType.begin() + 1, solverType.end(), solverType.begin() + 1, ::tolower); - - smooProto = Teuchos::rcp(new DirectSolver(solverType, Teuchos::ParameterList())); - smooProto->SetFactory("A", AFact); - - } else { - TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: unknown smoother type. '" << type << "' not supported by MueLu."); - } - TEUCHOS_TEST_FOR_EXCEPTION(smooProto == Teuchos::null, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter: no smoother prototype. fatal error."); - - // - // Create the smoother factory - // - - RCP SmooFact = rcp(new SmootherFactory()); - - // Set parameters of the smoother factory - MUELU_READ_PARAM(paramList, "smoother: pre or post", std::string, "both", preOrPost); - if (preOrPost == "both") { - SmooFact->SetSmootherPrototypes(smooProto, smooProto); - } else if (preOrPost == "pre") { - SmooFact->SetSmootherPrototypes(smooProto, Teuchos::null); - } else if (preOrPost == "post") { - SmooFact->SetSmootherPrototypes(Teuchos::null, smooProto); - } - - return SmooFact; -} - -template -void MLParameterListInterpreter::AddTransferFactory(const RCP& factory) { - // check if it's a TwoLevelFactoryBase based transfer factory - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast(factory) == Teuchos::null, Exceptions::BadCast, "Transfer factory is not derived from TwoLevelFactoryBase. Since transfer factories will be handled by the RAPFactory they have to be derived from TwoLevelFactoryBase!"); - TransferFacts_.push_back(factory); -} - -template -size_t MLParameterListInterpreter::NumTransferFactories() const { - return TransferFacts_.size(); -} - -template -void MLParameterListInterpreter::SetupOperator(Operator& Op) const { - try { - Matrix& A = dynamic_cast(Op); - if (A.IsFixedBlockSizeSet() && (A.GetFixedBlockSize() != blksize_)) - this->GetOStream(Warnings0) << "Setting matrix block size to " << blksize_ << " (value of the parameter in the list) " - << "instead of " << A.GetFixedBlockSize() << " (provided matrix)." << std::endl; - - A.SetFixedBlockSize(blksize_); - -#ifdef HAVE_MUELU_DEBUG - MatrixUtils::checkLocalRowMapMatchesColMap(A); -#endif // HAVE_MUELU_DEBUG - - } catch (std::bad_cast&) { - this->GetOStream(Warnings0) << "Skipping setting block size as the operator is not a matrix" << std::endl; - } -} - -} // namespace MueLu - -#define MUELU_MLPARAMETERLISTINTERPRETER_SHORT -#endif /* MUELU_MLPARAMETERLISTINTERPRETER_DEF_HPP */ - -// TODO: see if it can be factorized with ML interpreter (ex: generation of Ifpack param list) diff --git a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_decl.hpp b/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_decl.hpp deleted file mode 100644 index 82fd49a91699..000000000000 --- a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_decl.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_NULLSPACEFACTORY_KOKKOS_DECL_HPP -#define MUELU_NULLSPACEFACTORY_KOKKOS_DECL_HPP - -#include "MueLu_NullspaceFactory.hpp" - -namespace MueLu { - -template -class [[deprecated]] NullspaceFactory_kokkos : public NullspaceFactory {}; - -} // namespace MueLu - -#define MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -#endif // MUELU_NULLSPACEFACTORY_KOKKOS_DECL_HPP diff --git a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_def.hpp b/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_def.hpp deleted file mode 100644 index bad8376962c7..000000000000 --- a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_NullspaceFactory_kokkos_def.hpp +++ /dev/null @@ -1,15 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_NULLSPACEFACTORY_KOKKOS_DEF_HPP -#define MUELU_NULLSPACEFACTORY_KOKKOS_DEF_HPP - -#include "MueLu_NullspaceFactory_kokkos_decl.hpp" - -#endif // MUELU_NULLSPACEFACTORY_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList b/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList index fb5f2e6315cd..c606287c8edf 100644 --- a/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList +++ b/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList @@ -64,11 +64,9 @@ MapTransferFactory MatrixAnalysisFactory MergedBlockedMatrixFactory MergedSmoother -MLParameterListInterpreter - #if defined(HAVE_MUELU_DEPRECATED_CODE) MultiVectorTransferFactory NotayAggregationFactory NullspaceFactory -NullspaceFactory_kokkos - #if defined(HAVE_MUELU_DEPRECATED_CODE) NullspacePresmoothFactory ParameterListInterpreter PatternFactory diff --git a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake index 6ce2a712f4fb..20dc4095f4f0 100644 --- a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake +++ b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake @@ -63,11 +63,9 @@ APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MapTransferFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MatrixAnalysisFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MergedBlockedMatrixFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MergedSmoother ) -APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MLParameterListInterpreter-.?if.defined[HAVE_MUELU_DEPRECATED_CODE] ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MultiVectorTransferFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::NotayAggregationFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::NullspaceFactory ) -APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::NullspaceFactory_kokkos-.?if.defined[HAVE_MUELU_DEPRECATED_CODE] ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::NullspacePresmoothFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::ParameterListInterpreter ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::PatternFactory ) diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListIntepreter_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListIntepreter_fwd.hpp deleted file mode 100644 index 004cb8991843..000000000000 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListIntepreter_fwd.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_MLPARAMETERLISTINTEPRETER_FWD_HPP -#define MUELU_MLPARAMETERLISTINTEPRETER_FWD_HPP - -#include "MueLu_ConfigDefs.hpp" -#if defined(HAVE_MUELU_DEPRECATED_CODE) - -namespace MueLu { -template -class MLParameterListIntepreter; -} - -#ifndef MUELU_MLPARAMETERLISTINTEPRETER_SHORT -#define MUELU_MLPARAMETERLISTINTEPRETER_SHORT -#endif - -#endif - -#endif // MUELU_MLPARAMETERLISTINTEPRETER_FWD_HPP diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListInterpreter_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListInterpreter_fwd.hpp deleted file mode 100644 index a30343f06ed3..000000000000 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MLParameterListInterpreter_fwd.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_MLPARAMETERLISTINTERPRETER_FWD_HPP -#define MUELU_MLPARAMETERLISTINTERPRETER_FWD_HPP - -#include "MueLu_ConfigDefs.hpp" -#if defined(HAVE_MUELU_DEPRECATED_CODE) - -namespace MueLu { -template -class MLParameterListInterpreter; -} - -#ifndef MUELU_MLPARAMETERLISTINTERPRETER_SHORT -#define MUELU_MLPARAMETERLISTINTERPRETER_SHORT -#endif - -#endif - -#endif // MUELU_MLPARAMETERLISTINTERPRETER_FWD_HPP diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_NullspaceFactory_kokkos_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_NullspaceFactory_kokkos_fwd.hpp deleted file mode 100644 index 805c7ee1970f..000000000000 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_NullspaceFactory_kokkos_fwd.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// @HEADER -// ***************************************************************************** -// MueLu: A package for multigrid based preconditioning -// -// Copyright 2012 NTESS and the MueLu contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef MUELU_NULLSPACEFACTORY_KOKKOS_FWD_HPP -#define MUELU_NULLSPACEFACTORY_KOKKOS_FWD_HPP - -#include "MueLu_ConfigDefs.hpp" -#if defined(HAVE_MUELU_DEPRECATED_CODE) - -namespace MueLu { -template -class NullspaceFactory_kokkos; -} - -#ifndef MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -#define MUELU_NULLSPACEFACTORY_KOKKOS_SHORT -#endif - -#endif - -#endif // MUELU_NULLSPACEFACTORY_KOKKOS_FWD_HPP diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp index 57018cd0b047..f99253326a84 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp @@ -127,9 +127,9 @@ class UtilitiesBase { * @ret: vector containing max_{i\not=k}(-a_ik) */ - static Teuchos::RCP> GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A); + static Teuchos::RCP GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A); - static Teuchos::RCP> GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A, const Xpetra::Vector& BlockNumber); + static Teuchos::RCP GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A, const Xpetra::Vector& BlockNumber); /*! @brief Return vector containing inverse of input vector * diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp index 47db9ba0635f..f6e1740d47fe 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp @@ -584,12 +584,12 @@ UtilitiesBase:: } template -Teuchos::RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>> +Teuchos::RCP> UtilitiesBase:: GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A) { // Get/Create distributed objects RCP rowMap = A.getRowMap(); - auto diag = Xpetra::VectorFactory::Build(rowMap, false); + auto diag = Xpetra::VectorFactory::Build(rowMap, false); // Implement using Kokkos using local_vector_type = typename Vector::dual_view_type::t_dev_um; @@ -597,10 +597,7 @@ UtilitiesBase:: using execution_space = typename local_vector_type::execution_space; using values_type = typename local_matrix_type::values_type; using scalar_type = typename values_type::non_const_value_type; - using mag_type = typename Kokkos::ArithTraits::mag_type; using KAT_S = typename Kokkos::ArithTraits; - using KAT_M = typename Kokkos::ArithTraits; - using size_type = typename local_matrix_type::non_const_size_type; auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll); auto local_mat_dev = A.getLocalMatrixDevice(); @@ -609,11 +606,12 @@ UtilitiesBase:: Kokkos::parallel_for( "GetMatrixMaxMinusOffDiagonal", my_policy, KOKKOS_LAMBDA(const LocalOrdinal rowIdx) { - auto mymax = KAT_M::zero(); - auto row = local_mat_dev.row(rowIdx); + auto mymax = KAT_S::zero(); + auto row = local_mat_dev.rowConst(rowIdx); for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) { if (rowIdx != row.colidx(entryIdx)) { - mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx))); + if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx))) + mymax = -KAT_S::real(row.value(entryIdx)); } } diag_dev(rowIdx, 0) = mymax; @@ -623,14 +621,14 @@ UtilitiesBase:: } template -Teuchos::RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>> +Teuchos::RCP> UtilitiesBase:: GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A, const Xpetra::Vector& BlockNumber) { TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map."); // Get/Create distributed objects RCP rowMap = A.getRowMap(); - auto diag = Xpetra::VectorFactory::Build(rowMap, false); + auto diag = Xpetra::VectorFactory::Build(rowMap, false); // Implement using Kokkos using local_vector_type = typename Vector::dual_view_type::t_dev_um; @@ -638,10 +636,7 @@ UtilitiesBase:: using execution_space = typename local_vector_type::execution_space; using values_type = typename local_matrix_type::values_type; using scalar_type = typename values_type::non_const_value_type; - using mag_type = typename Kokkos::ArithTraits::mag_type; using KAT_S = typename Kokkos::ArithTraits; - using KAT_M = typename Kokkos::ArithTraits; - using size_type = typename local_matrix_type::non_const_size_type; auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll); auto local_mat_dev = A.getLocalMatrixDevice(); @@ -651,11 +646,12 @@ UtilitiesBase:: Kokkos::parallel_for( "GetMatrixMaxMinusOffDiagonal", my_policy, KOKKOS_LAMBDA(const LocalOrdinal rowIdx) { - auto mymax = KAT_M::zero(); + auto mymax = KAT_S::zero(); auto row = local_mat_dev.row(rowIdx); for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) { if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) { - mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx))); + if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx))) + mymax = -KAT_S::real(row.value(entryIdx)); } } diag_dev(rowIdx, 0) = mymax; diff --git a/packages/muelu/test/interface/kokkos/Output/MLaux_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLaux_tpetra.gold index 0f01f3917fe2..326fbeecded2 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLaux_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLaux_tpetra.gold @@ -12,10 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.01, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.01, blocksize = 1 aggregation: drop tol = 0.01 aggregation: drop scheme = distance laplacian Build (MueLu::TentativePFactory_kokkos) @@ -26,6 +23,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLcoarse1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLcoarse1_tpetra.gold index a59513adfcff..9c00a4283072 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLcoarse1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLcoarse1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLcoarse2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLcoarse2_tpetra.gold index 9086c4e5f506..07d8543bfbed 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLcoarse2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLcoarse2_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -155,9 +155,7 @@ smoother -> Level 5 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -166,6 +164,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLcoarse3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLcoarse3_tpetra.gold index d308a4bc6af2..870b0445b626 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLcoarse3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLcoarse3_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLcoarse4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLcoarse4_tpetra.gold index 6d6f4cc04a6b..24007fde4aa4 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLcoarse4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLcoarse4_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLcoarse5_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLcoarse5_tpetra.gold index cac568f06868..1bf1ac6facd2 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLcoarse5_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLcoarse5_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLpgamg1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLpgamg1_tpetra.gold index 44b6284e32d9..522a4d33c19a 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLpgamg1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLpgamg1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -52,9 +52,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -63,6 +61,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -93,9 +93,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -104,6 +102,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -134,9 +134,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -145,6 +143,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning1_tpetra.gold index 5073aefe576d..ac01d63b4a50 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning1_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -63,9 +63,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -114,9 +114,7 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -125,6 +123,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -165,9 +165,7 @@ Level 4 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -176,6 +174,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -216,9 +216,7 @@ Level 5 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -227,6 +225,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning2_tpetra.gold index 3eeaf2a6528e..6429fba26897 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning2_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -65,9 +65,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -76,6 +74,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -118,9 +118,7 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -129,6 +127,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -171,9 +171,7 @@ Level 4 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -182,6 +180,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning3_tpetra.gold index 69572cc64d41..816a2cbc7698 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLrepartitioning3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLrepartitioning3_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -66,9 +66,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -77,6 +75,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -120,9 +120,7 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -131,6 +129,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -174,9 +174,7 @@ Level 4 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -185,6 +183,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLsmoother1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLsmoother1_tpetra.gold index d308a4bc6af2..870b0445b626 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLsmoother1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLsmoother1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLsmoother2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLsmoother2_tpetra.gold index bb50b5fdb518..969a8a8bfac9 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLsmoother2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLsmoother2_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLsmoother3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLsmoother3_tpetra.gold index 835095475959..778384215364 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLsmoother3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLsmoother3_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLsmoother4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLsmoother4_tpetra.gold index 3757339a77df..a0e7ad721409 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLsmoother4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLsmoother4_tpetra.gold @@ -12,9 +12,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -49,9 +49,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -60,6 +58,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -86,9 +86,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -97,6 +95,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -123,9 +123,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -134,6 +132,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/MLunsmoothed1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/MLunsmoothed1_tpetra.gold index 66cce4e4b90b..4f7d5d9e2c95 100644 --- a/packages/muelu/test/interface/kokkos/Output/MLunsmoothed1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/MLunsmoothed1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -83,9 +83,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -94,6 +92,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -119,9 +119,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -130,6 +128,8 @@ BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) aggregation: match ML phase2a = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/aggregation1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/aggregation1_tpetra.gold index 3e08856bd6cc..07de05bb7aea 100644 --- a/packages/muelu/test/interface/kokkos/Output/aggregation1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/aggregation1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/aggregation3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/aggregation3_tpetra.gold index 1e9cdc73774e..d42eadfdc6b8 100644 --- a/packages/muelu/test/interface/kokkos/Output/aggregation3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/aggregation3_tpetra.gold @@ -11,10 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = classical Build (MueLu::TentativePFactory_kokkos) @@ -24,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,10 +46,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = classical Build (MueLu::TentativePFactory_kokkos) @@ -60,6 +56,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/aggregation4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/aggregation4_tpetra.gold index 57cf2ec15f45..26b3a933d761 100644 --- a/packages/muelu/test/interface/kokkos/Output/aggregation4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/aggregation4_tpetra.gold @@ -11,10 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.05, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.05, blocksize = 1 aggregation: drop tol = 0.05 aggregation: drop scheme = distance laplacian Build (MueLu::TentativePFactory_kokkos) @@ -24,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -50,10 +49,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.05, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.05, blocksize = 1 aggregation: drop tol = 0.05 aggregation: drop scheme = distance laplacian Build (MueLu::TentativePFactory_kokkos) @@ -63,6 +59,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/coarse1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/coarse1_tpetra.gold index b289d035dda9..c8c2fb4e0d13 100644 --- a/packages/muelu/test/interface/kokkos/Output/coarse1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/coarse1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -77,9 +77,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -87,6 +85,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/coarse2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/coarse2_tpetra.gold index 2226b1e34413..986986f65002 100644 --- a/packages/muelu/test/interface/kokkos/Output/coarse2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/coarse2_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -77,9 +77,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -87,6 +85,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -110,9 +110,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -120,6 +118,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -143,9 +143,7 @@ smoother -> Level 5 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -153,6 +151,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/coarse3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/coarse3_tpetra.gold index bed828c7df82..fc02273c3c0c 100644 --- a/packages/muelu/test/interface/kokkos/Output/coarse3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/coarse3_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_e3d_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_e3d_tpetra.gold index 591dfd5fe59e..11f55d4ccc24 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_e3d_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_e3d_tpetra.gold @@ -12,7 +12,7 @@ Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) Build (MueLu::AmalgamationFactory) [empty list] -algorithm = "classical": threshold = 0, blocksize = 3 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 3 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -44,7 +44,7 @@ Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) Build (MueLu::AmalgamationFactory) [empty list] -algorithm = "classical": threshold = 0, blocksize = 3 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 3 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) diff --git a/packages/muelu/test/interface/kokkos/Output/default_mhd_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_mhd_np4_tpetra.gold index 9c279a7330a1..4ec58304e539 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_mhd_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_mhd_np4_tpetra.gold @@ -18,14 +18,14 @@ Level 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -53,14 +53,14 @@ Level 2 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_mhd_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_mhd_tpetra.gold index 2725d881cea0..2d1901c6ce6a 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_mhd_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_mhd_tpetra.gold @@ -18,14 +18,14 @@ Level 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -53,14 +53,14 @@ Level 2 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_p2d_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_p2d_tpetra.gold index a3f2ccc308e9..2e0353b4237f 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_p2d_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_p2d_tpetra.gold @@ -10,9 +10,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -20,6 +18,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_p3d_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_p3d_tpetra.gold index a3f2ccc308e9..2e0353b4237f 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_p3d_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_p3d_tpetra.gold @@ -10,9 +10,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -20,6 +18,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_pg_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_pg_np4_tpetra.gold index bd7153246283..e131cc4fc673 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_pg_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_pg_np4_tpetra.gold @@ -11,14 +11,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,14 +42,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/default_pg_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/default_pg_tpetra.gold index f926861f5ace..d4a3486fd56e 100644 --- a/packages/muelu/test/interface/kokkos/Output/default_pg_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/default_pg_tpetra.gold @@ -11,14 +11,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,14 +42,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/driver_drekar1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/driver_drekar1_np4_tpetra.gold index c56ac4a46dcb..b5df153ebf4f 100644 --- a/packages/muelu/test/interface/kokkos/Output/driver_drekar1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/driver_drekar1_np4_tpetra.gold @@ -15,11 +15,9 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -27,6 +25,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -97,11 +97,9 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -109,6 +107,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -154,11 +154,9 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -166,6 +164,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/driver_drekar1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/driver_drekar1_tpetra.gold index c8225b994621..3c6c53c67518 100644 --- a/packages/muelu/test/interface/kokkos/Output/driver_drekar1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/driver_drekar1_tpetra.gold @@ -15,11 +15,9 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -27,6 +25,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -72,11 +72,9 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -84,6 +82,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -129,11 +129,9 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0, blocksize = 1 +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0, blocksize = 1 aggregation: drop scheme = distance laplacian -filtered matrix: use lumping = 1 [unused] +filtered matrix: use lumping = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -141,6 +139,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/driver_drekar2_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/driver_drekar2_np4_tpetra.gold index e195f2265097..1294cbc43d2d 100644 --- a/packages/muelu/test/interface/kokkos/Output/driver_drekar2_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/driver_drekar2_np4_tpetra.gold @@ -15,10 +15,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -29,6 +26,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -99,10 +98,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -113,6 +109,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -158,10 +156,7 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -172,6 +167,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/driver_drekar2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/driver_drekar2_tpetra.gold index d9166a4a3f3a..2e79c854a41b 100644 --- a/packages/muelu/test/interface/kokkos/Output/driver_drekar2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/driver_drekar2_tpetra.gold @@ -15,10 +15,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -29,6 +26,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -74,10 +73,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -88,6 +84,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -133,10 +131,7 @@ Level 3 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "distance laplacian": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "distance laplacian" distance laplacian algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 aggregation: drop scheme = distance laplacian filtered matrix: use lumping = 1 @@ -147,6 +142,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/emin1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/emin1_tpetra.gold index 87d98e6bbd71..558c631efc00 100644 --- a/packages/muelu/test/interface/kokkos/Output/emin1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/emin1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -47,9 +47,7 @@ smoother -> Level 2 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -57,6 +55,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/emin2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/emin2_tpetra.gold index d3d700a672ee..b0a6f7083c6f 100644 --- a/packages/muelu/test/interface/kokkos/Output/emin2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/emin2_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -49,9 +49,7 @@ smoother -> Level 2 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -59,6 +57,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/emin3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/emin3_tpetra.gold index beb9069dcbef..1c426729b397 100644 --- a/packages/muelu/test/interface/kokkos/Output/emin3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/emin3_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -49,9 +49,7 @@ smoother -> Level 2 Prolongator minimization (MueLu::EminPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -59,6 +57,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/empty_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/empty_tpetra.gold index 3e08856bd6cc..07de05bb7aea 100644 --- a/packages/muelu/test/interface/kokkos/Output/empty_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/empty_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold index 186ca4496970..180f89d95e9a 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -45,9 +45,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -56,6 +54,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -79,9 +79,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -90,6 +88,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold index df9604a89b67..8259cc104ae8 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_1_np4_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -45,9 +45,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -56,6 +54,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -79,9 +79,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -90,6 +88,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold index 0b51e98970e5..aed4bdcd30f8 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np1_tpetra.gold @@ -22,9 +22,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -33,6 +31,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] @@ -54,9 +54,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -65,6 +63,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold index baf6c048c339..eee74662e1a3 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_5_np4_tpetra.gold @@ -22,9 +22,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -33,6 +31,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] @@ -54,9 +54,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -65,6 +63,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold index 49df428e7ac1..8abd2dd54f54 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np1_tpetra.gold @@ -26,9 +26,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -37,6 +35,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] @@ -59,9 +59,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold index 9fb6a3101a8b..50662700a85d 100644 --- a/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/operator_solve_6_np4_tpetra.gold @@ -26,9 +26,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -37,6 +35,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] @@ -59,9 +59,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesDeterministic (Phase 2a (secondary)) BuildAggregatesDeterministic (Phase 2b (expansion)) BuildAggregatesDeterministic (Phase 3 (cleanup)) aggregation: deterministic = 1 [unused] +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/pg1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/pg1_tpetra.gold index 5cd915b67772..3993d8aa3e64 100644 --- a/packages/muelu/test/interface/kokkos/Output/pg1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/pg1_tpetra.gold @@ -13,14 +13,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -46,14 +46,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/pg2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/pg2_tpetra.gold index 5cd915b67772..3993d8aa3e64 100644 --- a/packages/muelu/test/interface/kokkos/Output/pg2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/pg2_tpetra.gold @@ -13,14 +13,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -46,14 +46,14 @@ Prolongator smoothing (PG-AMG) (MueLu::PgPFactory) Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition1_np4_tpetra.gold index bb19eb33551c..1eb2e9c6fcbb 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition1_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -60,9 +60,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition1_tpetra.gold index 412bfdae6761..d849a7220890 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition1_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -60,9 +60,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition3_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition3_np4_tpetra.gold index 40cd4782bdd0..d01d4cca3d90 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition3_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition3_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -61,9 +61,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -71,6 +69,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition3_tpetra.gold index 8848a605bf3d..0ded152ab26c 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition3_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -61,9 +61,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -71,6 +69,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition4_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition4_np4_tpetra.gold index ef7d15a75400..d58c71ed6aa0 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition4_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition4_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -65,9 +65,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -75,6 +73,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/repartition4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/repartition4_tpetra.gold index 53f8ec14732c..a34e807605c5 100644 --- a/packages/muelu/test/interface/kokkos/Output/repartition4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/repartition4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -65,9 +65,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -75,6 +73,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_np4_tpetra.gold index 6bd1e19b0132..6b8e82af56eb 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_tpetra.gold index ddb6f2cda9c7..c1825742b828 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-1_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_np4_tpetra.gold index 35f645ce54df..e9be96c407cb 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -58,9 +58,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -68,6 +66,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_tpetra.gold index 4c6bec2de826..2e276c11f43b 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RAP-2_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -58,9 +58,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -68,6 +66,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_np4_tpetra.gold index f7b528b12da7..eb674410b28a 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -58,9 +58,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -68,6 +66,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_tpetra.gold index c22abad73532..f41fd267e41c 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-RP-2_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -58,9 +58,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -68,6 +66,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-S-1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-S-1_np4_tpetra.gold index 0ce74043c745..d8594818fb07 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-S-1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-S-1_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -138,9 +138,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -148,6 +146,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -189,9 +189,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -199,6 +197,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-S-1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-S-1_tpetra.gold index 66b3846fcb50..1439c208a74b 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-S-1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-S-1_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -145,9 +145,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -155,6 +153,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -196,9 +196,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -206,6 +204,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-full-1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-full-1_np4_tpetra.gold index 3e15b97cf0e3..4c007cccf69c 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-full-1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-full-1_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-full-1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-full-1_tpetra.gold index df57cead5b10..3317c950e7c3 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-full-1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-full-1_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-none_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-none_np4_tpetra.gold index c567d971cec7..353184cc8755 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-none_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-none_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -60,9 +60,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -133,9 +133,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -143,6 +141,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -181,9 +181,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -191,6 +189,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-none_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-none_tpetra.gold index 4898e6dcfb70..202736e00075 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-none_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-none_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -60,9 +60,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -70,6 +68,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -137,9 +137,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -147,6 +145,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -185,9 +185,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -195,6 +193,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_np4_tpetra.gold index 931773684060..6c75ad79682b 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_np4_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -140,9 +140,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] @@ -178,9 +176,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_tpetra.gold index 283a59d925f1..df8c695af7ae 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-1_tpetra.gold @@ -13,9 +13,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -64,9 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -74,6 +72,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -145,9 +145,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] @@ -183,9 +181,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_np4_tpetra.gold index 99ad1a23b744..a041df517103 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_np4_tpetra.gold @@ -12,10 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -24,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -65,10 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -77,6 +73,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -142,10 +140,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 matrixmatrix: kernel params -> [empty list] @@ -179,10 +174,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -191,6 +183,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_tpetra.gold index 0e209db92aa3..51e717fb85e2 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-2_tpetra.gold @@ -12,10 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -24,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -65,10 +64,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -77,6 +73,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -146,10 +144,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 matrixmatrix: kernel params -> [empty list] @@ -183,10 +178,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0.02, blocksize = 1 -Lumping dropped entries +algorithm = "classical" classical algorithm = "default": threshold = 0.02, blocksize = 1 aggregation: drop tol = 0.02 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) @@ -195,6 +187,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_np4_tpetra.gold index ba9828cca88e..ad14ee9d5d04 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -63,9 +63,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -73,6 +71,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -138,9 +138,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] @@ -176,9 +174,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -186,6 +182,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_tpetra.gold index 41e831b05ba1..0d822dd954f3 100644 --- a/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/reuse-tP-3_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -63,9 +63,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -73,6 +71,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -142,9 +142,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Reusing previous AP data matrixmatrix: kernel params -> [empty list] @@ -180,9 +178,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -190,6 +186,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother10_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother10_tpetra.gold index 454794b53db2..6963611ce60b 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother10_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother10_tpetra.gold @@ -10,9 +10,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -20,6 +18,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother11_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother11_tpetra.gold index 23fd32664480..4818fdac660d 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother11_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother11_tpetra.gold @@ -15,9 +15,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -25,6 +23,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -52,9 +52,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -62,6 +60,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother12_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother12_tpetra.gold index 04ab2a78cbab..5ff00c17a796 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother12_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother12_tpetra.gold @@ -10,9 +10,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -20,6 +18,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -74,9 +74,7 @@ smoother -> Level 3 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -84,6 +82,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -106,9 +106,7 @@ smoother -> Level 4 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -116,6 +114,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -138,9 +138,7 @@ smoother -> Level 5 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -148,6 +146,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother13_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother13_tpetra.gold index 69e737694215..5bd1ac2ab59a 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother13_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother13_tpetra.gold @@ -13,9 +13,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -48,9 +48,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother1_tpetra.gold index 3e08856bd6cc..07de05bb7aea 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother2_tpetra.gold index 170df5b0a425..cad2c1d4b2b2 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother2_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother3_tpetra.gold index 68e878d15b39..fc4d2af803e1 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother3_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother4_tpetra.gold index 72e5138c42a8..592fcfd8912b 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother4_tpetra.gold @@ -5,9 +5,7 @@ Level 0 Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -15,6 +13,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -32,9 +32,7 @@ matrixmatrix: kernel params -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -42,6 +40,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother5_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother5_tpetra.gold index a3f2ccc308e9..2e0353b4237f 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother5_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother5_tpetra.gold @@ -10,9 +10,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -20,6 +18,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother6_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother6_tpetra.gold index 010f9135f9ac..0048ce280929 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother6_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother6_tpetra.gold @@ -8,9 +8,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -18,6 +16,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -38,9 +38,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -48,6 +46,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/smoother9_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/smoother9_tpetra.gold index 2a697355b847..572bc3e1a866 100644 --- a/packages/muelu/test/interface/kokkos/Output/smoother9_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/smoother9_tpetra.gold @@ -13,9 +13,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -23,6 +21,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -48,9 +48,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -58,6 +56,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/sync1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/sync1_tpetra.gold index 3e08856bd6cc..07de05bb7aea 100644 --- a/packages/muelu/test/interface/kokkos/Output/sync1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/sync1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,9 +44,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -54,6 +52,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/transpose1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/transpose1_tpetra.gold index 7bee44217696..a4d7d7de12af 100644 --- a/packages/muelu/test/interface/kokkos/Output/transpose1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/transpose1_tpetra.gold @@ -11,9 +11,7 @@ smoother -> Level 1 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -21,6 +19,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -42,9 +42,7 @@ smoother -> Level 2 Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -52,6 +50,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/transpose2_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/transpose2_np4_tpetra.gold index 37971dbc7010..b75a9ec83e86 100644 --- a/packages/muelu/test/interface/kokkos/Output/transpose2_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/transpose2_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -55,9 +55,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -65,6 +63,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/transpose2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/transpose2_tpetra.gold index 97aa95528902..985be2cd47c5 100644 --- a/packages/muelu/test/interface/kokkos/Output/transpose2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/transpose2_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -55,9 +55,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -65,6 +63,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/transpose3_np4_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/transpose3_np4_tpetra.gold index 823740eaa78e..d621f56e3626 100644 --- a/packages/muelu/test/interface/kokkos/Output/transpose3_np4_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/transpose3_np4_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -56,9 +56,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -66,6 +64,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/transpose3_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/transpose3_tpetra.gold index 899fdd29a9f5..6927436a6f9c 100644 --- a/packages/muelu/test/interface/kokkos/Output/transpose3_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/transpose3_tpetra.gold @@ -12,9 +12,7 @@ Level 1 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -22,6 +20,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -56,9 +56,7 @@ Level 2 Build (MueLu::RebalanceTransferFactory) Prolongator smoothing (MueLu::SaPFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) BuildAggregates (Phase - (Dirichlet)) @@ -66,6 +64,8 @@ BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/unsmoothed1_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/unsmoothed1_tpetra.gold index 039847881f42..ec9647a3dbc1 100644 --- a/packages/muelu/test/interface/kokkos/Output/unsmoothed1_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/unsmoothed1_tpetra.gold @@ -12,14 +12,14 @@ Level 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -43,14 +43,14 @@ Level 2 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/interface/kokkos/Output/unsmoothed2_tpetra.gold b/packages/muelu/test/interface/kokkos/Output/unsmoothed2_tpetra.gold index 861f191ab2a4..d2d9cc6349d2 100644 --- a/packages/muelu/test/interface/kokkos/Output/unsmoothed2_tpetra.gold +++ b/packages/muelu/test/interface/kokkos/Output/unsmoothed2_tpetra.gold @@ -12,14 +12,14 @@ Level 1 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) @@ -44,14 +44,14 @@ Level 2 Build (MueLu::TentativePFactory_kokkos) Build (MueLu::UncoupledAggregationFactory) Build (MueLu::CoalesceDropFactory_kokkos) -Build (MueLu::AmalgamationFactory) -[empty list] -algorithm = "classical": threshold = 0, blocksize = 1 +algorithm = "classical" classical algorithm = "default": threshold = 0, blocksize = 1 BuildAggregates (Phase - (Dirichlet)) BuildAggregatesRandom (Phase 1 (main)) BuildAggregatesRandom (Phase 2a (secondary)) BuildAggregatesRandom (Phase 2b (expansion)) BuildAggregatesRandom (Phase 3 (cleanup)) +Build (MueLu::AmalgamationFactory) +[empty list] Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace Build (MueLu::CoarseMapFactory) diff --git a/packages/muelu/test/unit_tests/Aggregates.cpp b/packages/muelu/test/unit_tests/Aggregates.cpp index 48020116bbda..444c6c297b74 100644 --- a/packages/muelu/test/unit_tests/Aggregates.cpp +++ b/packages/muelu/test/unit_tests/Aggregates.cpp @@ -75,6 +75,7 @@ class AggregateGenerator { aggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(0)); aggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); aggFact->SetParameter("aggregation: allow user-specified singletons", Teuchos::ParameterEntry(true)); + aggFact->SetParameter("aggregation: deterministic", Teuchos::ParameterEntry(true)); aggFact->SetParameter("aggregation: enable phase 1", Teuchos::ParameterEntry(bPhase1)); aggFact->SetParameter("aggregation: enable phase 2a", Teuchos::ParameterEntry(bPhase2a)); @@ -155,6 +156,7 @@ class AggregateGenerator { aggFact->SetParameter("aggregation: min agg size", Teuchos::ParameterEntry(3)); aggFact->SetParameter("aggregation: max selected neighbors", Teuchos::ParameterEntry(0)); aggFact->SetParameter("aggregation: ordering", Teuchos::ParameterEntry(std::string("natural"))); + aggFact->SetParameter("aggregation: deterministic", Teuchos::ParameterEntry(true)); aggFact->SetParameter("aggregation: enable phase 1", Teuchos::ParameterEntry(true)); aggFact->SetParameter("aggregation: enable phase 2a", Teuchos::ParameterEntry(true)); aggFact->SetParameter("aggregation: enable phase 2b", Teuchos::ParameterEntry(true)); diff --git a/packages/muelu/test/unit_tests/Hierarchy.cpp b/packages/muelu/test/unit_tests/Hierarchy.cpp index 8ccea6f393d0..3e4e2c55e1c7 100644 --- a/packages/muelu/test/unit_tests/Hierarchy.cpp +++ b/packages/muelu/test/unit_tests/Hierarchy.cpp @@ -234,6 +234,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Hierarchy, Iterate, Scalar, LocalOrdinal, Glob UncoupledAggFact->SetMinNodesPerAggregate(3); UncoupledAggFact->SetMaxNeighAlreadySelected(0); UncoupledAggFact->SetOrdering("natural"); + UncoupledAggFact->SetParameter("aggregation: deterministic", Teuchos::ParameterEntry(true)); RCP cdFact; RCP TentPFact = rcp(new TentativePFactory()); @@ -443,6 +444,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Hierarchy, IterateWithImplicitRestriction, Sca UncoupledAggFact->SetMinNodesPerAggregate(3); UncoupledAggFact->SetMaxNeighAlreadySelected(0); UncoupledAggFact->SetOrdering("natural"); + UncoupledAggFact->SetParameter("aggregation: deterministic", Teuchos::ParameterEntry(true)); RCP cdFact; RCP TentPFact = rcp(new TentativePFactory()); diff --git a/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp b/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp index 2e4b8da1073f..e29fe020ea25 100644 --- a/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp +++ b/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp @@ -82,7 +82,12 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(ParameterListInterpreter, BlockCrs, Scalar, Lo if (found == std::string::npos) continue; out << "Processing file: " << fileList[i] << std::endl; - ParameterListInterpreter mueluFactory("ParameterList/ParameterListInterpreter/" + fileList[i], *comm); + + Teuchos::RCP mueluList = rcp(new Teuchos::ParameterList()); + Teuchos::updateParametersFromXmlFileAndBroadcast("ParameterList/ParameterListInterpreter/" + fileList[i], mueluList.ptr(), *comm); + mueluList->set("use kokkos refactor", false); + + ParameterListInterpreter mueluFactory(*mueluList, comm); RCP H = mueluFactory.CreateHierarchy(); H->GetLevel(0)->Set("A", A); @@ -168,14 +173,18 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(ParameterListInterpreter, PointCrs_vs_BlockCrs out << "Processing file: " << fileList[i] << std::endl; + Teuchos::RCP mueluList = rcp(new Teuchos::ParameterList()); + Teuchos::updateParametersFromXmlFileAndBroadcast("ParameterList/ParameterListInterpreter/" + fileList[i], mueluList.ptr(), *comm); + mueluList->set("use kokkos refactor", false); + // Point Hierarchy - ParameterListInterpreter mueluFactory1("ParameterList/ParameterListInterpreter/" + fileList[i], *comm); + ParameterListInterpreter mueluFactory1(*mueluList, comm); RCP PointH = mueluFactory1.CreateHierarchy(); PointH->GetLevel(0)->Set("A", PointA); mueluFactory1.SetupHierarchy(*PointH); // Block Hierachy - ParameterListInterpreter mueluFactory2("ParameterList/ParameterListInterpreter/" + fileList[i], *comm); + ParameterListInterpreter mueluFactory2(*mueluList, comm); RCP BlockH = mueluFactory2.CreateHierarchy(); BlockH->GetLevel(0)->Set("A", BlockA); mueluFactory2.SetupHierarchy(*BlockH); diff --git a/packages/muelu/test/unit_tests/UncoupledAggregationFactory.cpp b/packages/muelu/test/unit_tests/UncoupledAggregationFactory.cpp index 983a132cf14c..3b3768199dca 100644 --- a/packages/muelu/test/unit_tests/UncoupledAggregationFactory.cpp +++ b/packages/muelu/test/unit_tests/UncoupledAggregationFactory.cpp @@ -108,6 +108,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(UncoupledAggregationFactory, Build_ML, Scalar, RCP aggFact = rcp(new UncoupledAggregationFactory()); + aggFact->SetParameter("aggregation: deterministic", Teuchos::ParameterEntry(true)); // Test the ML style options aggFact->SetParameter("aggregation: match ML phase2a", Teuchos::ParameterEntry(true)); aggFact->SetParameter("aggregation: match ML phase2b", Teuchos::ParameterEntry(true)); diff --git a/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp b/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp index c856c755ec50..e4d206f2fdeb 100644 --- a/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp +++ b/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp @@ -13,12 +13,15 @@ #include "MueLu_TestHelpers_kokkos.hpp" #include "MueLu_Version.hpp" +#include #include "MueLu_CoalesceDropFactory.hpp" #include "MueLu_FilteredAFactory.hpp" #include "MueLu_CoalesceDropFactory_kokkos.hpp" #include "MueLu_AmalgamationFactory.hpp" #include "MueLu_LWGraph_kokkos.hpp" +#include + namespace MueLuTests { TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, Constructor, Scalar, LocalOrdinal, GlobalOrdinal, Node) { @@ -33,6 +36,1215 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, Constructor, Scala out << *coalesceDropFact << std::endl; } +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, Build, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); +} // Build + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, DistanceLaplacian, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 40); + +} // DistanceLaplacian + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, DistanceLaplacianScaledCut, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + // Now we doctor the coordinates so that the off-diagonal pair row 0 will want to keep (0,1) and row 1 will want to drop (1,0) + if (comm->getRank() == 0) { + auto vals = coordinates->getDataNonConst(0); + vals[0] = vals[0] - 2000 * 36; + } + + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 8.0)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: distance laplacian algo", Teuchos::ParameterEntry(std::string("scaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 105); + +} // DistanceLaplacianScaledCut + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, DistanceLaplacianUnscaledCut, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + + // Now we doctor the coordinates so that the off-diagonal pair row 0 will want to keep (0,1) and row 1 will want to drop (1,0) + if (!comm->getRank()) { + auto vals = coordinates->getDataNonConst(0); + vals[0] = vals[0] - 2000 * 36; + } + + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 8.0)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: distance laplacian algo", Teuchos::ParameterEntry(std::string("unscaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 105); + +} // DistanceLaplacianUnscaleCut + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, DistanceLaplacianCutSym, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + + // Now we doctor the coordinates so that the off-diagonal pair row 0 will want to keep (0,1) and row 1 will want to drop (1,0) + if (!comm->getRank()) { + auto vals = coordinates->getDataNonConst(0); + vals[0] = vals[0] - 2000 * 36; + } + + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.5)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: distance laplacian algo", Teuchos::ParameterEntry(std::string("scaled cut symmetric"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 106); + +} // DistanceLaplacianCutScaled + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicalScaledCut, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + // Change entry (1,0) + auto crsA = Teuchos::rcp_dynamic_cast(A, true)->getCrsMatrix(); + crsA->resumeFill(); + if (comm->getRank() == 0) { + Teuchos::Array cols(3); + Teuchos::Array vals(3); + size_t numEntries; + crsA->getGlobalRowCopy(1, cols, vals, numEntries); + vals[0] = 0.5; + crsA->replaceGlobalValues(1, cols, vals); + } + crsA->fillComplete(); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("classical"))); + coalesceDropFact.SetParameter("aggregation: classical algo", Teuchos::ParameterEntry(std::string("scaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 105); + +} // ClassicalScaledCut + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicalUnScaledCut, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + // Change entry (1,0) + auto crsA = Teuchos::rcp_dynamic_cast(A, true)->getCrsMatrix(); + crsA->resumeFill(); + if (comm->getRank() == 0) { + Teuchos::Array cols(3); + Teuchos::Array vals(3); + size_t numEntries; + crsA->getGlobalRowCopy(1, cols, vals, numEntries); + vals[0] = 0.5; + crsA->replaceGlobalValues(1, cols, vals); + } + crsA->fillComplete(); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("classical"))); + coalesceDropFact.SetParameter("aggregation: classical algo", Teuchos::ParameterEntry(std::string("unscaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 105); + +} // ClassicalUnScaledCut + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicalCutSym, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + // Change entry (1,0) + auto crsA = Teuchos::rcp_dynamic_cast(A, true)->getCrsMatrix(); + crsA->resumeFill(); + if (comm->getRank() == 0) { + Teuchos::Array cols(3); + Teuchos::Array vals(3); + size_t numEntries; + crsA->getGlobalRowCopy(1, cols, vals, numEntries); + vals[0] = 0.5; + crsA->replaceGlobalValues(1, cols, vals); + } + crsA->fillComplete(); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("classical"))); + coalesceDropFact.SetParameter("aggregation: classical algo", Teuchos::ParameterEntry(std::string("scaled cut symmetric"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 106); + +} // ClassicalCutSym + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, SignedClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + // Change entry (1,0) + auto crsA = Teuchos::rcp_dynamic_cast(A, true)->getCrsMatrix(); + crsA->resumeFill(); + if (comm->getRank() == 0) { + Teuchos::Array cols(3); + Teuchos::Array vals(3); + size_t numEntries; + crsA->getGlobalRowCopy(1, cols, vals, numEntries); + vals[0] *= 2; + crsA->replaceGlobalValues(1, cols, vals); + } + crsA->fillComplete(); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // A_10 = -2 + // A_ij = -1 + // A_ii = 2 + // criterion for dropping is + // -Re(L_ij) <= tol * max_{k\neq i} Re(-L_ik) + // -> We drop entry (1,2). + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("signed classical"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 105); + +} // SignedClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, SignedScaledCutClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("signed classical"))); + coalesceDropFact.SetParameter("aggregation: classical algo", Teuchos::ParameterEntry(std::string("scaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + TEST_THROW(coalesceDropFact.Build(fineLevel), MueLu::Exceptions::RuntimeError); + + // RCP graph = fineLevel.Get >("Graph", &coalesceDropFact); + // LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + // TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + // const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + // const RCP myDomainMap = graph->GetDomainMap(); + + // TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + // TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + // TEST_EQUALITY(myImportMap->getMinLocalIndex(),0); + // TEST_EQUALITY(myImportMap->getGlobalNumElements(),Teuchos::as(36 + (comm->getSize()-1)*2)); + + // TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + // TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + // TEST_EQUALITY(myDomainMap->getMinLocalIndex(),0); + // TEST_EQUALITY(myDomainMap->getGlobalNumElements(),36); + + // TEST_EQUALITY(graph->GetGlobalNumEdges(),36); + +} // SignedScaledCutClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, SignedUnscaledCutClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("signed classical"))); + coalesceDropFact.SetParameter("aggregation: classical algo", Teuchos::ParameterEntry(std::string("unscaled cut"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + TEST_THROW(coalesceDropFact.Build(fineLevel), MueLu::Exceptions::RuntimeError); + + // RCP graph = fineLevel.Get >("Graph", &coalesceDropFact); + // LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + // TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + // const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + // const RCP myDomainMap = graph->GetDomainMap(); + + // TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + // TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + // TEST_EQUALITY(myImportMap->getMinLocalIndex(),0); + // TEST_EQUALITY(myImportMap->getGlobalNumElements(),Teuchos::as(36 + (comm->getSize()-1)*2)); + + // TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + // TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + // TEST_EQUALITY(myDomainMap->getMinLocalIndex(),0); + // TEST_EQUALITY(myDomainMap->getGlobalNumElements(),36); + + // TEST_EQUALITY(graph->GetGlobalNumEdges(),36); + +} // SignedUnScaledCutClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalColoredSignedClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal colored signed classical"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 36); + +} // BlockDiagonalColoredSignedClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalNoColoredSignedClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + // this test is only compatible with rank higher than 1 + if (comm->getSize() == 1) { + return; + } + + // Default is Laplace1D with nx = 8748. + // It's a nice size for 1D and perfect aggregation. (6561 = 3^8) + // Nice size for 1D and perfect aggregation on small numbers of processors. (8748 = 4*3^7) + Teuchos::CommandLineProcessor clp(false); + Galeri::Xpetra::Parameters matrixParameters(clp, 8748); // manage parameters of the test case + Xpetra::Parameters xpetraParameters(clp); + + RCP map = MapFactory::Build(xpetraParameters.GetLib(), matrixParameters.GetNumGlobalElements(), 0, comm); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + // getCrsGraph()->getImporter() + RCP importer = ImportFactory::Build(A->getRowMap(), map); + fineLevel.Set("Importer", importer); + auto importerTest = A->getCrsGraph()->getImporter(); // NULL + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal colored signed classical"))); + coalesceDropFact.SetParameter("aggregation: coloring: localize color graph", Teuchos::ParameterEntry(false)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + // Need an importer + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 36); + +} // BlockDiagonalNoColoredSignedClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalSignedClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + + RCP A = TestHelpers_kokkos::TestFactory::Build1DPoisson(36); + fineLevel.Set("A", A); + + Teuchos::ParameterList galeriList; + galeriList.set("nx", Teuchos::as(36)); + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); + fineLevel.Set("Coordinates", coordinates); + + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + // We're dropping all the interior off-diagonal entries. + // dx = 1/36 + // L_ij = -36 + // L_ii = 72 + // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 0.51)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal signed classical"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph_d = fineLevel.Get>("Graph", &coalesceDropFact); + auto graph = graph_d->copyToHost(); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); + + const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! + const RCP myDomainMap = graph->GetDomainMap(); + + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); + TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 36); + +} // BlockDiagonalSignedClassical + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonal, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + RCP A = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(matrixList, lib); + A->SetFixedBlockSize(1); // So we can block diagonalize + Level fineLevel; + fineLevel.Set("A", A); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP ibFact = rcp(new InitialBlockNumberFactory()); + Teuchos::ParameterList ibList; + ibList.set("aggregation: block diagonal: interleaved blocksize", 3); + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetFactory("BlockNumber", ibFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 8.0)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal"))); + coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + RCP A = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(matrixList, lib); + A->SetFixedBlockSize(1); // So we can block diagonalize + Level fineLevel; + fineLevel.Set("A", A); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP ibFact = rcp(new InitialBlockNumberFactory()); + Teuchos::ParameterList ibList; + ibList.set("aggregation: block diagonal: interleaved blocksize", 3); + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetFactory("BlockNumber", ibFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0 / 8.0)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal classical"))); + coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalDistanceLaplacian, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + RCP A = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(matrixList, lib); + A->SetFixedBlockSize(1); // So we can block diagonalize + Level fineLevel; + fineLevel.Set("A", A); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP ibFact = rcp(new InitialBlockNumberFactory()); + Teuchos::ParameterList ibList; + ibList.set("aggregation: block diagonal: interleaved blocksize", 3); + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetFactory("BlockNumber", ibFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.025)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalDistanceDifferentCoordinatesLaplacian, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + RCP A = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(matrixList, lib); + A->SetFixedBlockSize(1); // So we can block diagonalize + Level fineLevel; + fineLevel.Set("A", A); + + GO bnx = 15 * comm->getSize(); + Teuchos::ParameterList bMatrixList; + matrixList.set("bnx", bnx); + RCP B = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(bMatrixList, lib); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", B->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP ibFact = rcp(new InitialBlockNumberFactory()); + Teuchos::ParameterList ibList; + ibList.set("aggregation: block diagonal: interleaved blocksize", 3); + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetFactory("BlockNumber", ibFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.025)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, BlockDiagonalDistanceLaplacianWeighted, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + matrixList.set("ny", (GO)10); + matrixList.set("nz", (GO)10); + matrixList.set("matrixType", "Laplace3D"); + RCP A = TestHelpers_kokkos::TestFactory::BuildBlockMatrixAsPoint(matrixList, lib); + A->SetFixedBlockSize(1); // So we can block diagonalize + Level fineLevel; + fineLevel.Set("A", A); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", A->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP ibFact = rcp(new InitialBlockNumberFactory()); + Teuchos::ParameterList ibList; + ibList.set("aggregation: block diagonal: interleaved blocksize", 3); + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetFactory("BlockNumber", ibFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.025)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal distance laplacian"))); + coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); + std::vector weights_v{100.0, 1.0, 1.0, 1.0, 100, 1.0, 1.0, 1.0, 100.0}; + Teuchos::Array weights(weights_v); + coalesceDropFact.SetParameter("aggregation: distance laplacian directional weights", Teuchos::ParameterEntry(weights)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, DistanceLaplacianWeighted, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + typedef Xpetra::MultiVector RealValuedMultiVector; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + matrixList.set("ny", (GO)10); + matrixList.set("nz", (GO)10); + matrixList.set("matrixType", "Laplace3D"); + RCP A = TestHelpers_kokkos::TestFactory::BuildMatrix(matrixList, lib); + + Level fineLevel; + fineLevel.Set("A", A); + + RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("3D", A->getRowMap(), matrixList); + fineLevel.Set("Coordinates", coordinates); + + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.025)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); + std::vector weights_v{100.0, 1.0, 1.0}; + Teuchos::Array weights(weights_v); + coalesceDropFact.SetParameter("aggregation: distance laplacian directional weights", Teuchos::ParameterEntry(weights)); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, SignedClassicalSA, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + typedef Teuchos::ScalarTraits STS; + typedef typename STS::magnitudeType real_type; + + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + GO nx = 10 * comm->getSize(); + Teuchos::ParameterList matrixList; + matrixList.set("nx", nx); + matrixList.set("ny", (GO)10); + matrixList.set("nz", (GO)10); + matrixList.set("matrixType", "Laplace3D"); + RCP A = TestHelpers_kokkos::TestFactory::BuildMatrix(matrixList, lib); + + Level fineLevel; + fineLevel.Set("A", A); + + RCP amalgFact = rcp(new AmalgamationFactory()); + CoalesceDropFactory_kokkos coalesceDropFact; + coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(0.0)); + coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("signed classical sa"))); + fineLevel.Request("Graph", &coalesceDropFact); + fineLevel.Request("DofsPerNode", &coalesceDropFact); + + coalesceDropFact.Build(fineLevel); + + RCP graph = fineLevel.Get>("Graph", &coalesceDropFact); + LO myDofsPerNode = fineLevel.Get("DofsPerNode", &coalesceDropFact); + TEST_EQUALITY(Teuchos::as(myDofsPerNode) == 1, true); +} + TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicScalarWithoutFiltering, Scalar, LocalOrdinal, GlobalOrdinal, Node) { #include "MueLu_UseShortNames.hpp" MUELU_TESTING_SET_OSTREAM; @@ -363,7 +1575,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicBlockWithFi dropFact.Build(fineLevel); - auto graph = fineLevel.Get >("Graph", &dropFact); + auto graph_d = fineLevel.Get >("Graph", &dropFact); + auto graph = graph_d->copyToHost(); auto myDofsPerNode = fineLevel.Get ("DofsPerNode", &dropFact); TEST_EQUALITY(as(myDofsPerNode) == 1, true); @@ -919,6 +2132,16 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, 2x2, Scalar, Local expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, 0.0, 0.0, 2.0, reuseGraph)); expectedBoundaryNodesVector.push_back({true, true}); + + // test case 9 + Teuchos::ParameterList params9 = Teuchos::ParameterList(params0); + params9.set("aggregation: drop scheme", "classical"); + params9.set("aggregation: classical algo", "unscaled cut"); + params9.set("aggregation: drop tol", 1.0 / 3.6); + params.push_back(params9); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, -1.0, + -1.5, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({false, false}); } for (size_t testNo = 0; testNo < params.size(); ++testNo) { @@ -1007,12 +2230,31 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, 2x2, Scalar, Local } } -#define MUELU_ETI_GROUP(SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, Constructor, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithoutFiltering, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithFiltering, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicBlockWithoutFiltering, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, AggresiveDroppingIsMarkedAsBoundary, SC, LO, GO, NO) \ +#define MUELU_ETI_GROUP(SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, Constructor, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, Build, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, DistanceLaplacian, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, DistanceLaplacianScaledCut, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, DistanceLaplacianUnscaledCut, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, DistanceLaplacianCutSym, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicalScaledCut, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicalUnScaledCut, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicalCutSym, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, SignedClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, SignedScaledCutClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, SignedUnscaledCutClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonalColoredSignedClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonalNoColoredSignedClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonalSignedClassical, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonal, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonalDistanceLaplacian, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, BlockDiagonalDistanceLaplacianWeighted, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, DistanceLaplacianWeighted, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, SignedClassicalSA, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithoutFiltering, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithFiltering, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicBlockWithoutFiltering, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, AggresiveDroppingIsMarkedAsBoundary, SC, LO, GO, NO) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, 2x2, SC, LO, GO, NO) // TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicBlockWithFiltering, SC, LO, GO, NO) // not implemented yet diff --git a/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp b/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp index ff400f9ca3cb..9d8b51bcdfa8 100644 --- a/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp +++ b/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp @@ -674,7 +674,63 @@ class TestFactory { } #endif #endif -}; // class TestFactory + + // Create a matrix as specified by parameter list options + static RCP BuildBlockMatrixAsPoint(Teuchos::ParameterList& matrixList, Xpetra::UnderlyingLib lib) { + RCP > comm = TestHelpers_kokkos::Parameters::getDefaultComm(); + GO GO_INVALID = Teuchos::OrdinalTraits::invalid(); + RCP Op; + + if (lib == Xpetra::NotSpecified) + lib = TestHelpers_kokkos::Parameters::getLib(); + + // Make the base graph + RCP old_matrix = TestHelpers_kokkos::TestFactory::BuildMatrix(matrixList, lib); + RCP old_graph = old_matrix->getCrsGraph(); + RCP old_rowmap = old_graph->getRowMap(); + RCP old_colmap = old_graph->getColMap(); + int blocksize = 3; + + // Block Map + LO orig_num_rows = (LO)old_graph->getRowMap()->getLocalNumElements(); + Teuchos::Array owned_rows(blocksize * orig_num_rows); + for (LO i = 0; i < orig_num_rows; i++) { + GO old_gid = old_rowmap->getGlobalElement(i); + for (int j = 0; j < blocksize; j++) { + owned_rows[i * blocksize + j] = old_gid * blocksize + j; + } + } + RCP new_map = Xpetra::MapFactory::Build(lib, GO_INVALID, owned_rows(), 0, comm); + if (new_map.is_null()) throw std::runtime_error("BuildBlockMatrixAsPoint: Map constructor failed"); + + // Block Graph / Matrix + RCP new_matrix = Xpetra::CrsMatrixFactory::Build(new_map, blocksize * old_graph->getLocalMaxNumRowEntries()); + if (new_matrix.is_null()) throw std::runtime_error("BuildBlockMatrixAsPoint: Matrix constructor failed"); + for (LO i = 0; i < orig_num_rows; i++) { + Teuchos::ArrayView old_indices; + Teuchos::ArrayView old_values; + Teuchos::Array new_indices(1); + Teuchos::Array new_values(1); + old_matrix->getLocalRowView(i, old_indices, old_values); + for (int ii = 0; ii < blocksize; ii++) { + GO GRID = new_map->getGlobalElement(i * blocksize + ii); + for (LO j = 0; j < (LO)old_indices.size(); j++) { + for (int jj = 0; jj < blocksize; jj++) { + new_indices[0] = old_colmap->getGlobalElement(old_indices[j]) * blocksize + jj; + new_values[0] = old_values[j] * (SC)((ii == jj && i == old_indices[j]) ? blocksize * blocksize : 1); + new_matrix->insertGlobalValues(GRID, new_indices(), new_values); + } + } + } + } + new_matrix->fillComplete(); + Op = rcp(new CrsMatrixWrap(new_matrix)); + if (new_map.is_null()) throw std::runtime_error("BuildBlockMatrixAsPoint: CrsMatrixWrap constructor failed"); + Op->SetFixedBlockSize(blocksize); + + return Op; + } // BuildBlockMatrixAsPoint() +}; // class TestFactory // Helper class which has some Tpetra specific code inside // We put this into an extra helper class as we need partial specializations and diff --git a/packages/muelu/test/unit_tests_kokkos/Regression.cpp b/packages/muelu/test/unit_tests_kokkos/Regression.cpp index 46f4f88777ce..c1f41f7cade2 100644 --- a/packages/muelu/test/unit_tests_kokkos/Regression.cpp +++ b/packages/muelu/test/unit_tests_kokkos/Regression.cpp @@ -98,12 +98,12 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, H2D, Scalar, LocalOrdinal, GlobalO } #ifdef KOKKOS_HAS_SHARED_SPACE else { - size_t targetNumDeepCopies = kkNativeDeepCopies + (std::is_same_v ? 19 : 31); + size_t targetNumDeepCopies = kkNativeDeepCopies + (std::is_same_v ? 17 : 32); TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), targetNumDeepCopies); } #else else { - TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), kkNativeDeepCopies + 31); + TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), kkNativeDeepCopies + 32); } #endif // KOKKOS_HAS_SHARED_SPACE @@ -175,12 +175,12 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Regression, Aggregation, Scalar, LocalOrdinal, } #ifdef KOKKOS_HAS_SHARED_SPACE else { - size_t targetNumDeepCopies = std::is_same_v ? 17 : 16; + size_t targetNumDeepCopies = std::is_same_v ? 11 : 17; TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), targetNumDeepCopies); } #else else { - TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), 16); + TEST_EQUALITY(Tpetra::Details::DeepCopyCounter::get_count_different_space(), 17); } #endif diff --git a/packages/panzer/disc-fe/src/Panzer_BasisValues2_impl.hpp b/packages/panzer/disc-fe/src/Panzer_BasisValues2_impl.hpp index b036a7f7fba2..bbcea92f0315 100644 --- a/packages/panzer/disc-fe/src/Panzer_BasisValues2_impl.hpp +++ b/packages/panzer/disc-fe/src/Panzer_BasisValues2_impl.hpp @@ -1120,7 +1120,7 @@ getBasisValues(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1174,7 +1174,7 @@ getBasisValues(const bool weighted, } else if(element_space == PureBasis::HGRAD || element_space == PureBasis::CONST) { fst::HGRADtransformVALUE(s_aux,s_ref); } -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); @@ -1292,7 +1292,7 @@ getVectorBasisValues(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1352,7 +1352,7 @@ getVectorBasisValues(const bool weighted, auto s_jac_det = Kokkos::subview(cubature_jacobian_determinant_.get_view(), cell_range, Kokkos::ALL()); fst::HDIVtransformVALUE(s_aux,s_jac, s_jac_det, s_ref); } -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); @@ -1456,7 +1456,7 @@ getGradBasisValues(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1506,7 +1506,7 @@ getGradBasisValues(const bool weighted, // Apply transformation using fst=Intrepid2::FunctionSpaceTools; fst::HGRADtransformGRAD(s_aux, s_jac_inv, s_ref); -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); @@ -1611,7 +1611,7 @@ getCurl2DVectorBasis(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1665,7 +1665,7 @@ getCurl2DVectorBasis(const bool weighted, // the divergence space in 2D! using fst=Intrepid2::FunctionSpaceTools; fst::HDIVtransformDIV(s_aux,s_jac_det,s_ref); -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); @@ -1767,7 +1767,7 @@ getCurlVectorBasis(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1817,7 +1817,7 @@ getCurlVectorBasis(const bool weighted, using fst=Intrepid2::FunctionSpaceTools; fst::HCURLtransformCURL(s_aux, s_jac, s_jac_det, s_ref); -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); @@ -1917,7 +1917,7 @@ getDivVectorBasis(const bool weighted, // while create_mirror_view creates views in UVMSpace or // HIPSpace. These are not "assignable" in kokkos. We do an // inefficient copy if UVM or UNIFIED_MEMORY is enabled. -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) #ifdef KOKKOS_ENABLE_CUDA if constexpr (std::is_same::value) { #else @@ -1965,7 +1965,7 @@ getDivVectorBasis(const bool weighted, using fst=Intrepid2::FunctionSpaceTools; fst::HDIVtransformDIV(s_aux,s_jac_det,s_ref); -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_IMPL_HIP_UNIFIED_MEMORY) } #endif PHX::Device().fence(); diff --git a/packages/piro/cmake/Dependencies.cmake b/packages/piro/cmake/Dependencies.cmake index ea637c4b8bec..7d5e7ef5584b 100644 --- a/packages/piro/cmake/Dependencies.cmake +++ b/packages/piro/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ SET(LIB_REQUIRED_DEP_PACKAGES Teuchos Stratimikos ThyraCore Tpetra Teko) -SET(LIB_OPTIONAL_DEP_PACKAGES NOX Tempus Stokhos +SET(LIB_OPTIONAL_DEP_PACKAGES NOX Tempus ROL Ifpack2 MueLu ThyraEpetraAdapters ThyraEpetraExtAdapters Epetra EpetraExt) SET(TEST_REQUIRED_DEP_PACKAGES ThyraTpetraAdapters MPI) SET(TEST_OPTIONAL_DEP_PACKAGES) diff --git a/packages/piro/cmake/Piro_config.hpp.in b/packages/piro/cmake/Piro_config.hpp.in index ac80335878e3..894f95980888 100644 --- a/packages/piro/cmake/Piro_config.hpp.in +++ b/packages/piro/cmake/Piro_config.hpp.in @@ -24,9 +24,7 @@ #cmakedefine HAVE_PIRO_TEMPUS /* DEPRECATED */ #cmakedefine Piro_ENABLE_Tempus -#cmakedefine HAVE_PIRO_STOKHOS /* DEPRECATED */ -#cmakedefine Piro_ENABLE_Stokhos #cmakedefine HAVE_PIRO_ROL /* DEPRECATED */ #cmakedefine Piro_ENABLE_ROL diff --git a/packages/piro/doc/index.doc b/packages/piro/doc/index.doc index 757f771f16ee..cbd522c5c185 100644 --- a/packages/piro/doc/index.doc +++ b/packages/piro/doc/index.doc @@ -59,8 +59,7 @@ by %Piro include:
  • NOX: Nonlinear Solver
  • LOCA: Continuation and Bifurcation Analysis Solver -
  • Stokhos: Embedded UQ solver for Stochastic-Galerkin over random variables -
  • LIME: Algorithms for multi-physics coupling (under development) +
  • ROL: Tools for Numerical Optimization
Each of these solvers not only takes a ModelEvaluator as @@ -121,7 +120,7 @@ and not just two. %Piro is developed by Andy Salinger, Roscoe Bartlett, Todd Coffey, Kim Liegeois, Roger Pawlowski, Mauro Perego, Eric Phipps and Irina Tezaur. Much of the code is adapted from -tests and examples for NOX, LOCA, Stokhos, and was developed and matured in the Albany +tests and examples for NOX, LOCA, and was developed and matured in the Albany application code before being library-ized into %Piro. */ diff --git a/packages/piro/src/CMakeLists.txt b/packages/piro/src/CMakeLists.txt index b515135120ff..b1b29af49e52 100644 --- a/packages/piro/src/CMakeLists.txt +++ b/packages/piro/src/CMakeLists.txt @@ -128,25 +128,6 @@ IF (Piro_ENABLE_Tempus) Piro_ObserverToTempusIntegrationObserverAdapter_Def.hpp) ENDIF() -# Optional StochasticGalerkin capability, depending upon Stokhos and NOX -IF (Piro_ENABLE_Stokhos AND Piro_ENABLE_NOX AND PIRO_HAVE_EPETRA_STACK) - APPEND_SET(HEADERS - Piro_Epetra_StokhosNOXObserver.hpp - Piro_Epetra_StokhosSolverFactory.hpp - Piro_Epetra_StokhosSolver.hpp - Piro_Epetra_StokhosMPSolver.hpp - Piro_Epetra_NECoupledModelEvaluator.hpp - ) - APPEND_SET(SOURCES - Piro_Epetra_StokhosNOXObserver.cpp - Piro_Epetra_StokhosSolverFactory.cpp - Piro_Epetra_StokhosSolver.cpp - Piro_Epetra_StokhosMPSolver.cpp - Piro_Epetra_NECoupledModelEvaluator.cpp - ) -ENDIF() - - IF (Piro_ENABLE_ROL) APPEND_SET(HEADERS Piro_ThyraProductME_Objective_SimOpt.hpp diff --git a/packages/piro/src/Piro_Epetra_NECoupledModelEvaluator.cpp b/packages/piro/src/Piro_Epetra_NECoupledModelEvaluator.cpp deleted file mode 100644 index 0422e22940cf..000000000000 --- a/packages/piro/src/Piro_Epetra_NECoupledModelEvaluator.cpp +++ /dev/null @@ -1,1194 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#include "Piro_Epetra_NECoupledModelEvaluator.hpp" -#include "Piro_Epetra_SolverFactory.hpp" -#include "Piro_Epetra_StokhosSolver.hpp" - -#include "Epetra_LocalMap.h" - -#include "Teuchos_Assert.hpp" -#include "Teuchos_TimeMonitor.hpp" -#include "Teuchos_VerboseObjectParameterListHelpers.hpp" - -#include "Stokhos_Epetra.hpp" -#include "Stokhos_ReducedBasisFactory.hpp" -#include "EpetraExt_MultiComm.h" - -Piro::Epetra::NECoupledModelEvaluator:: -NECoupledModelEvaluator( - const Teuchos::Array >& models_, - const Teuchos::Array >& piroParams_, - const Teuchos::RCP& network_model_, - const Teuchos::RCP& params_, - const Teuchos::RCP& comm_, - const Teuchos::Array< Teuchos::RCP >& observers_): - models(models_), - piroParams(piroParams_), - network_model(network_model_), - params(params_), - comm(comm_), - observers(observers_) -{ - // Setup VerboseObject - Teuchos::readVerboseObjectSublist(params.get(), this); - - n_models = models.size(); - solvers.resize(n_models); - - // Create solvers for models A and B - bool stochastic = params->get("Stochastic", false); - if (observers.size() < n_models) - observers.resize(n_models); - if (stochastic) { - sgSolvers.resize(n_models); - for (int i=0; isetup(models[i], observers[i]); - solvers[i] = sgSolvers[i]; - } - } - else { - Piro::Epetra::SolverFactory solverFactory; - for (int i=0; iget< Teuchos::Array >("Network Coupling Parameter Indices"); - g_indices = - params->get< Teuchos::Array >("Network Coupling Response Indices"); - TEUCHOS_ASSERT(p_indices.size() == n_models); - TEUCHOS_ASSERT(g_indices.size() == n_models); - - // Get number of parameter and response vectors - solver_inargs.resize(n_models); - solver_outargs.resize(n_models); - num_params.resize(n_models); - num_responses.resize(n_models); - num_params_total = 0; - num_responses_total = 0; - for (int i=0; icreateInArgs(); - solver_outargs[i] = solvers[i]->createOutArgs(); - num_params[i] = solver_inargs[i].Np(); - num_responses[i] = solver_outargs[i].Ng(); - num_params_total += num_params[i]; - num_responses_total += num_responses[i]; - } - num_params_total -= n_models; - num_responses_total -= n_models; - - // Building indexing maps between coupled system parameters/responses and - // individual components - // Parameter vector i of this model evaluator corresponds to parameter - // param_map[i].second for model param_map[i].first. Similarly for the - // responses - for (int i=0; iget_p_map(p_indices[i]); - n_p[i] = p_maps[i]->NumGlobalElements(); - nx += n_p[i]; - } - x_map = Teuchos::rcp(new Epetra_Map(nx, 0, *comm)); - x_overlap_map = Teuchos::rcp(new Epetra_LocalMap(nx, 0, *comm)); - x_importer = Teuchos::rcp(new Epetra_Import(*x_overlap_map, *x_map)); - x_overlap = Teuchos::rcp(new Epetra_Vector(*x_overlap_map)); - - // Build f map, which is the product of the g_indices response maps - // For the time being, we will assume local maps, in the future we need to - // build proper product maps - g_maps.resize(n_models); - n_g.resize(n_models); - int nf = 0; - for (int i=0; iget_g_map(g_indices[i]); - n_g[i] = g_maps[i]->NumGlobalElements(); - nf += n_g[i]; - } - f_map = Teuchos::rcp(new Epetra_Map(nf, 0, *comm)); - f_overlap_map = Teuchos::rcp(new Epetra_LocalMap(nf, 0, *comm)); - f_exporter = Teuchos::rcp(new Epetra_Export(*f_overlap_map, *f_map)); - f_overlap = Teuchos::rcp(new Epetra_Vector(*f_overlap_map)); - - // Determine what we support - supports_W = true; - supports_x_sg = true; - supports_f_sg = true; - supports_W_sg = true; - Teuchos::Array ds(n_models); - for (int i=0; iMyGlobalElements(); - for (int i=0; iNumMyElements(); i++) { - int row = f_map->GID(i); - W_graph->InsertGlobalIndices(row, nx, indices); - } - W_graph->FillComplete(); - - W_overlap_graph = - Teuchos::rcp(new Epetra_CrsGraph(Copy, *f_overlap_map, nx)); - for (int i=0; iNumMyElements(); i++) { - int row = f_overlap_map->GID(i); - W_overlap_graph->InsertGlobalIndices(row, nx, indices); - } - W_overlap_graph->FillComplete(); - W_overlap = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *W_overlap_graph)); - } - - // Build initial guess - Epetra_Vector x_init_overlap(*x_overlap_map); - int offset = 0; - for (int i=0; i p_init = - solvers[i]->get_p_init(p_indices[i]); - for (int j=0; jExport(x_init_overlap, *x_importer, Insert); - - // Create storage for parameters, responses, and derivatives - p.resize(n_models); - g.resize(n_models); - dgdp_layout.resize(n_models); - dgdp.resize(n_models); - for (int i=0; isublist("Dimension Reduction"); - if (!dim_reduct_params.isParameter("Reduce Dimension")) - reduce_dimension.resize(n_models, 0); - else if (dim_reduct_params.isType("Reduce Dimension")) - reduce_dimension.resize(n_models, - dim_reduct_params.get("Reduce Dimension")); - else if (dim_reduct_params.isType< Teuchos::Array >("Reduce Dimension")) - reduce_dimension = - dim_reduct_params.get< Teuchos::Array >("Reduce Dimension"); - else - TEUCHOS_TEST_FOR_EXCEPTION( - true, std::logic_error, - "Invalid type for parameter \"Dimension Reduction\""); -} - -// Overridden from EpetraExt::ModelEvaluator - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_x_map() const -{ - return x_map; -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_f_map() const -{ - return f_map; -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_x_init() const -{ - return x_init; -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_p_map(int j) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( - j >= num_params_total || j < 0, Teuchos::Exceptions::InvalidParameter, - std::endl << - "Error in Piro::Epetra::NECoupledModelEvaluator::get_p_map(): " << - "Invalid parameter index j = " << j << std::endl); - - return solvers[param_map[j].first]->get_p_map(param_map[j].second); -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_g_map(int j) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( - j >= num_responses_total || j < 0, Teuchos::Exceptions::InvalidParameter, - std::endl << - "Error in Piro::Epetra::NECoupledModelEvaluator::get_g_map(): " << - "Invalid response index j = " << j << std::endl); - - return solvers[response_map[j].first]->get_g_map(response_map[j].second); -} - -Teuchos::RCP > -Piro::Epetra::NECoupledModelEvaluator:: -get_p_names(int j) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( - j >= num_params_total || j < 0, Teuchos::Exceptions::InvalidParameter, - std::endl << - "Error in Piro::Epetra::NECoupledModelEvaluator::get_p_names(): " << - "Invalid parameter index j = " << j << std::endl); - - return solvers[param_map[j].first]->get_p_names(param_map[j].second); -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -get_p_init(int j) const -{ - TEUCHOS_TEST_FOR_EXCEPTION( - j >= num_params_total || j < 0, Teuchos::Exceptions::InvalidParameter, - std::endl << - "Error in Piro::Epetra::NECoupledModelEvaluator::get_p_init(): " << - "Invalid parameter index j = " << j << std::endl); - - return solvers[param_map[j].first]->get_p_init(param_map[j].second); -} - -Teuchos::RCP -Piro::Epetra::NECoupledModelEvaluator:: -create_W() const -{ - Teuchos::RCP mat = - Teuchos::rcp(new Epetra_CrsMatrix(Copy, *W_graph)); - mat->FillComplete(); - return mat; -} - -EpetraExt::ModelEvaluator::InArgs -Piro::Epetra::NECoupledModelEvaluator:: -createInArgs() const -{ - InArgsSetup inArgs; - inArgs.setModelEvalDescription(this->description()); - - // Deterministic InArgs - inArgs.setSupports(IN_ARG_x, true); - inArgs.set_Np(num_params_total); - - // Stochastic InArgs - if (supports_x_sg) { - inArgs.setSupports(IN_ARG_x_sg, supports_x_sg); - inArgs.setSupports(IN_ARG_sg_basis,true); - inArgs.setSupports(IN_ARG_sg_quadrature,true); - inArgs.setSupports(IN_ARG_sg_expansion,true); - for (int i=0; idescription()); - - // Deterministic OutArgs - outArgs.setSupports(OUT_ARG_f, true); - outArgs.setSupports(OUT_ARG_W, supports_W); - outArgs.set_W_properties( - DerivativeProperties(DERIV_LINEARITY_NONCONST, DERIV_RANK_FULL, true)); - outArgs.set_Np_Ng(num_params_total, num_responses_total); - for (int i=0; i out = this->getOStream(); - Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel(); - - // Create fresh in/out args for sub-models - for (int i=0; icreateInArgs(); - solver_outargs[i] = solvers[i]->createOutArgs(); - } - - EpetraExt::ModelEvaluator::InArgs network_inargs = inArgs; - EpetraExt::ModelEvaluator::OutArgs network_outargs = outArgs; - - // - // Deterministic calculation - // - Teuchos::RCP x = inArgs.get_x(); - if (x != Teuchos::null) { - - // p - x_overlap->Import(*x, *x_importer, Insert); - int offset = 0; - for (int i=0; i f = outArgs.get_f(); - if (f != Teuchos::null) { - for (int i=0; i W = outArgs.get_W(); - if (W != Teuchos::null) { - for (int i=0; i > basis = - inArgs.get_sg_basis(); - Teuchos::RCP multiComm = - x_sg->productComm(); - if (sg_overlap_map == Teuchos::null) - sg_overlap_map = - Teuchos::rcp(new Epetra_LocalMap(basis->size(), 0, - multiComm->TimeDomainComm())); - - if (x_sg_overlap == Teuchos::null) - x_sg_overlap = - Teuchos::rcp(new Stokhos::EpetraVectorOrthogPoly( - basis, sg_overlap_map, x_overlap_map, multiComm)); - if (supports_f_sg && f_sg_overlap == Teuchos::null) - f_sg_overlap = - Teuchos::rcp(new Stokhos::EpetraVectorOrthogPoly( - basis, sg_overlap_map, f_overlap_map, multiComm)); - if (supports_W_sg && W_sg_overlap == Teuchos::null) { - Teuchos::RCP domain_base_map = - x_overlap_map; - Teuchos::RCP range_base_map = - f_overlap_map; - W_sg_overlap = - Teuchos::rcp(new Stokhos::EpetraOperatorOrthogPoly( - basis, sg_overlap_map, domain_base_map, range_base_map, - multiComm)); - for (int block=0; blocksize(); block++) { - Teuchos::RCP W = - Teuchos::rcp(new Epetra_CrsMatrix(Copy, *W_overlap_graph)); - W_sg_overlap->setCoeffPtr(block,W); - } - } - - for (int i=0; isize(); block++) { - (*x_sg_overlap)[block].Import((*x_sg)[block], *x_importer, Insert); - int offset = 0; - for (int i=0; i solver_inargs_red(n_models); - Teuchos::Array solver_outargs_red(n_models); - Teuchos::Array > solvers_red(n_models); - - Teuchos::Array > piroParams_red(n_models); - - for (int i=0; ievalModel(solver_inargs_red[0], solver_outargs_red[0]); - } - - { - TEUCHOS_FUNC_TIME_MONITOR( - "NECoupledModelEvaluator -- Model 2 nonlinear elimination"); - if (verbLevel != Teuchos::VERB_NONE) - *out << "Eliminating model " << 2 << " states..."; - solvers_red[1]->evalModel(solver_inargs_red[1], solver_outargs_red[1]); - } - } - else { - for (int i=0; ievalModel(solver_inargs_red[i], solver_outargs_red[i]); - } - } - - // Project back to original stochastic bases - for (int i=0; ievalModel(solver_inargs, solver_outargs, - network_inargs, network_outargs, - n_p, n_g, p, g, dgdp, dgdp_layout, - p_sg, g_sg, dgdp_sg, dgdp_sg_layout); - - // Export network residuals, Jacobians, etc... - - // f - Teuchos::RCP f = outArgs.get_f(); - if (f != Teuchos::null) - f->Export(*f_overlap, *f_exporter, Insert); - - // W - Teuchos::RCP W = outArgs.get_W(); - if (W != Teuchos::null) { - Teuchos::RCP W_crs = - Teuchos::rcp_dynamic_cast(W, true); - W_crs->Export(*W_overlap, *f_exporter, Insert); - } - - // f_sg - if (supports_f_sg) { - OutArgs::sg_vector_t f_sg = outArgs.get_f_sg(); - if (f_sg != Teuchos::null) { - for (int block=0; blocksize(); block++) - (*f_sg)[block].Export((*f_sg_overlap)[block], *f_exporter, Insert); - } - } - - // W_sg - if (supports_W_sg) { - OutArgs::sg_operator_t W_sg = outArgs.get_W_sg(); - if (W_sg != Teuchos::null) { - for (int block=0; blocksize(); block++) { - Teuchos::RCP W_crs = - Teuchos::rcp_dynamic_cast( - W_sg->getCoeffPtr(block), true); - Teuchos::RCP W_overlap_crs = - Teuchos::rcp_dynamic_cast( - W_sg_overlap->getCoeffPtr(block), true); - W_crs->Export(*W_overlap_crs, *f_exporter, Insert); - } - } - } - } - - -void -Piro::Epetra::NECoupledModelEvaluator:: -do_dimension_reduction( - int model_index, - const InArgs& inArgs, - const InArgs& solver_inargs, - const OutArgs& solver_outargs, - const Teuchos::RCP& model, - const Teuchos::RCP& solver, - const Teuchos::RCP& solver_params, - InArgs& reduced_inargs, - OutArgs& reduced_outargs, - Teuchos::RCP& reduced_solver, - Teuchos::RCP& reduced_params) const -{ - TEUCHOS_FUNC_TIME_MONITOR("NECoupledModelEvaluator -- dimension reduction"); - - // First copy the in/out args to set everything we don't modify - reduced_inargs = solver_inargs; - reduced_outargs = solver_outargs; - reduced_solver = solver; - reduced_params = params; - - // Make sure there is something to do - InArgs::sg_const_vector_t x_sg; - if (supports_x_sg) - x_sg = inArgs.get_x_sg(); - if (!reduce_dimension[model_index] || x_sg == Teuchos::null) - return; - - Teuchos::RCP > basis = - Teuchos::rcp_dynamic_cast >( - inArgs.get_sg_basis(), true); - Teuchos::RCP > quad = - inArgs.get_sg_quadrature(); - Teuchos::RCP > expansion - = inArgs.get_sg_expansion(); - - // Copy Epetra PCEs into Stokhos PCE objects - int total_num_p = 0; - for (int i=0; icoefficientMap()->NumMyElements(); - } - } - int sz = basis->size(); - Teuchos::Array< Stokhos::OrthogPolyApprox > p_opa(total_num_p); - int index = 0; - for (int i=0; icoefficientMap()->NumMyElements(); k++) - p_opa[index+k].reset(basis); - for (int j=0; jcoefficientMap()->NumMyElements(); - } - } - - // Build Stieltjes basis, quadrature, and new PCEs - Teuchos::RCP > red_basis; - Teuchos::RCP > red_quad; - Teuchos::Array > red_pces; - Teuchos::ParameterList& reduct_params = - params->sublist("Dimension Reduction"); - int order = basis->order(); - int new_order = reduct_params.get("Reduced Order", -1); - if (new_order == -1) - new_order = order; - if (st_quad == Teuchos::null) { - st_quad = quad; - // st_quad = - // Teuchos::rcp(new Stokhos::SparseGridQuadrature( - // basis, new_order+1)); - // st_quad = - // Teuchos::rcp(new Stokhos::TensorProductQuadrature( - // basis, 4*new_order+1)); - // std::cout << "st_quad->size() = " << st_quad->size() << std::endl; - } - Teuchos::RCP > Cijk = - expansion->getTripleProduct(); - Stokhos::ReducedBasisFactory factory(reduct_params); - Teuchos::RCP< Stokhos::ReducedPCEBasis > gs_basis = - factory.createReducedBasis(new_order, p_opa, st_quad, Cijk); - red_basis = gs_basis; - red_quad = gs_basis->getReducedQuadrature(); - red_pces.resize(p_opa.size()); - for (int i=0; itransformFromOriginalBasis(p_opa[i].coeff(), red_pces[i].coeff()); - } - - Teuchos::RCP multiComm = x_sg->productComm(); - - // Copy into Epetra objects - int red_sz = red_basis->size(); - Teuchos::RCP red_overlap_map = - Teuchos::rcp(new Epetra_LocalMap(red_sz, 0, - multiComm->TimeDomainComm())); - - // p_red - index = 0; - for (int i=0; iget_p_map(i), - multiComm)); - for (int j=0; jcoefficientMap()->NumMyElements(); - reduced_inargs.set_p_sg(i, p_red); - } - } - - for (int i=0; iget_g_map(i), - multiComm)); - reduced_outargs.set_g_sg(i, g_red); - } - - // dg/dx_red - if (!solver_outargs.supports(OUT_ARG_DgDx_sg, i).none()) { - Teuchos::RCP dgdx_sg = - solver_outargs.get_DgDx_sg(i).getMultiVector(); - if (dgdx_sg != Teuchos::null) { - Teuchos::RCP dgdx_red = - Teuchos::rcp(new Stokhos::EpetraMultiVectorOrthogPoly( - red_basis, red_overlap_map, - dgdx_sg->coefficientMap(), - multiComm, - dgdx_sg->numVectors())); - reduced_outargs.set_DgDx_sg( - i, SGDerivative(dgdx_red, - solver_outargs.get_DgDx_sg(i).getMultiVectorOrientation())); - } - } - - // dg/dp_red - for (int j=0; j dgdp_sg = - solver_outargs.get_DgDp_sg(i,j).getMultiVector(); - if (dgdp_sg != Teuchos::null) { - Teuchos::RCP dgdp_red = - Teuchos::rcp(new Stokhos::EpetraMultiVectorOrthogPoly( - red_basis, red_overlap_map, - dgdp_sg->coefficientMap(), - multiComm, - dgdp_sg->numVectors())); - reduced_outargs.set_DgDp_sg( - i, j, SGDerivative(dgdp_red, - solver_outargs.get_DgDp_sg(i,j).getMultiVectorOrientation())); - } - } - } - } - - - // Setup new solver - reduced_params = - Teuchos::rcp(new Teuchos::ParameterList(*solver_params)); - Teuchos::ParameterList& red_sg_params = - reduced_params->sublist("Stochastic Galerkin"); - red_sg_params.sublist("Basis").set("Stochastic Galerkin Basis", - red_basis); - red_sg_params.sublist("Quadrature").set("Stochastic Galerkin Quadrature", - red_quad); - if (red_sg_params.sublist("Expansion").isParameter("Stochastic Galerkin Expansion")) - red_sg_params.sublist("Expansion").remove("Stochastic Galerkin Expansion"); - if (red_sg_params.isParameter("Triple Product Tensor")) - red_sg_params.remove("Triple Product Tensor"); - Teuchos::RCP reduced_piro_solver = - Teuchos::rcp(new Piro::Epetra::StokhosSolver(reduced_params, comm)); - reduced_piro_solver->setup(model, observers[model_index]); - reduced_solver = reduced_piro_solver; - - if (reduced_inargs.supports(IN_ARG_sg_basis)) - reduced_inargs.set_sg_basis(red_basis); - if (reduced_inargs.supports(IN_ARG_sg_quadrature)) - reduced_inargs.set_sg_quadrature(red_quad); - if (reduced_inargs.supports(IN_ARG_sg_expansion)) - reduced_inargs.set_sg_expansion(red_sg_params.sublist("Expansion").get< Teuchos::RCP< Stokhos::OrthogPolyExpansion > >("Stochastic Galerkin Expansion")); -} - -void -Piro::Epetra::NECoupledModelEvaluator:: -do_dimension_projection( - int model_index, - const InArgs& inArgs, - const InArgs& reduced_inargs, - const OutArgs& reduced_outargs, - OutArgs& solver_outargs) const -{ - TEUCHOS_FUNC_TIME_MONITOR("NECoupledModelEvaluator -- dimension projection"); - - // Make sure there is something to do - InArgs::sg_const_vector_t x_sg; - if (supports_x_sg) - x_sg = inArgs.get_x_sg(); - if (!reduce_dimension[model_index] || x_sg == Teuchos::null) - return; - - Teuchos::RCP > basis = - inArgs.get_sg_basis(); - Teuchos::RCP > quad = - inArgs.get_sg_quadrature(); - Teuchos::RCP > red_basis = - Teuchos::rcp_dynamic_cast >(reduced_inargs.get_sg_basis()); - - for (int i=0; itransformToOriginalBasis( - (*g_red)[0].Values(), - (*g_sg)[0].Values(), - g_red->coefficientMap()->NumMyElements(), - true); - } - } - - // dg/dx_sg - if (!solver_outargs.supports(OUT_ARG_DgDx_sg, i).none()) { - Teuchos::RCP dgdx_sg = - solver_outargs.get_DgDx_sg(i).getMultiVector(); - if (dgdx_sg != Teuchos::null) { - Teuchos::RCP dgdx_red = - reduced_outargs.get_DgDx_sg(i).getMultiVector(); - - // transformToOriginalBasis() needs the entries for each pce - // coefficient stored contiguously. This isn't the case for the - // full multivector (each column along with all of its pce - // coefficients is stored in one contiguous chunk). Thus we need - // to transform each column individually - int ncol = dgdx_red->numVectors(); - for (int col=0; coltransformToOriginalBasis( - (*dgdx_red)[0](col)->Values(), - (*dgdx_sg)[0](col)->Values(), - dgdx_red->coefficientMap()->NumMyElements(), - true); - } - } - - // dg/dp_sg - for (int j=0; j dgdp_sg = - solver_outargs.get_DgDp_sg(i,j).getMultiVector(); - if (dgdp_sg != Teuchos::null) { - Teuchos::RCP dgdp_red = - reduced_outargs.get_DgDp_sg(i,j).getMultiVector(); - - // transformToOriginalBasis() needs the entries for each pce - // coefficient stored contiguously. This isn't the case for the - // full multivector (each column along with all of its pce - // coefficients is stored in one contiguous chunk). Thus we need - // to transform each column individually - int ncol = dgdp_red->numVectors(); - for (int col=0; coltransformToOriginalBasis( - (*dgdp_red)[0](col)->Values(), - (*dgdp_sg)[0](col)->Values(), - dgdp_red->coefficientMap()->NumMyElements(), - true); - } - } - } - } -} - - -void -Piro::Epetra::ParamToResponseNetworkModel:: -evalModel( - const Teuchos::Array& model_inargs, - const Teuchos::Array& model_outargs, - const EpetraExt::ModelEvaluator::InArgs& network_inargs, - const EpetraExt::ModelEvaluator::OutArgs& network_outargs, - const Teuchos::Array& n_p, - const Teuchos::Array& n_g, - const Teuchos::Array< Teuchos::RCP >& p, - const Teuchos::Array< Teuchos::RCP >& g, - const Teuchos::Array< Teuchos::RCP >& dgdp, - const Teuchos::Array& dgdp_layout, - const Teuchos::Array& p_sg, - const Teuchos::Array& g_sg, - const Teuchos::Array >& dgdp_sg, - const Teuchos::Array& dgdp_sg_layout) const -{ - - // f - Teuchos::RCP f = network_outargs.get_f(); - if (f != Teuchos::null) { - f->PutScalar(0.0); - for (int i=0; i W = network_outargs.get_W(); - if (W != Teuchos::null) { - Teuchos::RCP W_crs = - Teuchos::rcp_dynamic_cast(W, true); - W_crs->PutScalar(0.0); - int row, col; - double val; - for (int i=0; iReplaceGlobalValues(row, 1, &val, &col); - - // dg_2/dp_2 part - for (int j=0; jReplaceGlobalValues(row, 1, &val, &col); - } - } - for (int i=0; iReplaceGlobalValues(row, 1, &val, &col); - - // dg_1/dp_1 part - for (int j=0; jReplaceGlobalValues(row, 1, &val, &col); - } - } - } - - // f_sg - if (network_outargs.supports(EpetraExt::ModelEvaluator::OUT_ARG_f_sg)) { - EpetraExt::ModelEvaluator::OutArgs::sg_vector_t f_sg = - network_outargs.get_f_sg(); - if (f_sg != Teuchos::null) { - // std::cout << "g_sg[0] = " << *g_sg[0] << std::endl; - // std::cout << "g_sg[1] = " << *g_sg[1] << std::endl; - f_sg->init(0.0); - for (int block=0; blocksize(); block++) { - for (int i=0; i& model_inargs, - const Teuchos::Array& model_outargs, - const EpetraExt::ModelEvaluator::InArgs& network_inargs, - const EpetraExt::ModelEvaluator::OutArgs& network_outargs, - const Teuchos::Array& n_p, - const Teuchos::Array& n_g, - const Teuchos::Array< Teuchos::RCP >& p, - const Teuchos::Array< Teuchos::RCP >& g, - const Teuchos::Array< Teuchos::RCP >& dgdp, - const Teuchos::Array& dgdp_layout, - const Teuchos::Array& p_sg, - const Teuchos::Array& g_sg, - const Teuchos::Array >& dgdp_sg, - const Teuchos::Array& dgdp_sg_layout) const = 0; - - }; - - class ParamToResponseNetworkModel : - public AbstractNetworkModel { - - public: - - //! Constructor - ParamToResponseNetworkModel() {} - - //! Destructor - virtual ~ParamToResponseNetworkModel() {} - - //! evaluate model - virtual void evalModel( - const Teuchos::Array& model_inargs, - const Teuchos::Array& model_outargs, - const EpetraExt::ModelEvaluator::InArgs& network_inargs, - const EpetraExt::ModelEvaluator::OutArgs& network_outargs, - const Teuchos::Array& n_p, - const Teuchos::Array& n_g, - const Teuchos::Array< Teuchos::RCP >& p, - const Teuchos::Array< Teuchos::RCP >& g, - const Teuchos::Array< Teuchos::RCP >& dgdp, - const Teuchos::Array& dgdp_layout, - const Teuchos::Array& p_sg, - const Teuchos::Array& g_sg, - const Teuchos::Array >& dgdp_sg, - const Teuchos::Array& dgdp_sg_layout) const; - - }; - - class NECoupledModelEvaluator : - public EpetraExt::ModelEvaluator, - public Teuchos::VerboseObject { - public: - - /** \brief . */ - NECoupledModelEvaluator( - const Teuchos::Array >& models, - const Teuchos::Array >& piroParams, - const Teuchos::RCP& network_model, - const Teuchos::RCP& params, - const Teuchos::RCP& comm, - const Teuchos::Array< Teuchos::RCP >& observers = - Teuchos::Array >()); - - /** \name Overridden from EpetraExt::ModelEvaluator . */ - //@{ - - /** \brief . */ - Teuchos::RCP get_x_map() const; - /** \brief . */ - Teuchos::RCP get_f_map() const; - /** \brief . */ - Teuchos::RCP get_x_init() const; - /** \brief . */ - Teuchos::RCP get_p_map(int l) const; - /** \brief . */ - Teuchos::RCP get_g_map(int j) const; - //! Return array of parameter names - Teuchos::RCP > get_p_names(int l) const; - /** \brief . */ - Teuchos::RCP get_p_init(int l) const; - /** \brief . */ - Teuchos::RCP create_W() const; - /** \brief . */ - InArgs createInArgs() const; - /** \brief . */ - OutArgs createOutArgs() const; - /** \brief . */ - void evalModel( const InArgs& inArgs, const OutArgs& outArgs ) const; - - //@} - - protected: - - void do_dimension_reduction( - int model_index, - const InArgs& inArgs, - const InArgs& solver_inargs, - const OutArgs& solver_outargs, - const Teuchos::RCP& model, - const Teuchos::RCP& solver, - const Teuchos::RCP& solver_params, - InArgs& reduced_inargs, - OutArgs& reduced_outargs, - Teuchos::RCP& reduced_solver, - Teuchos::RCP& reduced_params) const; - - void do_dimension_projection( - int model_index, - const InArgs& inArgs, - const InArgs& reduced_inargs, - const OutArgs& reduced_outargs, - OutArgs& solver_outargs) const; - - private: - - // ///////////////////////////////////// - // Private member data - - typedef Stokhos::StandardStorage StorageType; - - Teuchos::Array > models; - Teuchos::Array< Teuchos::RCP > piroParams; - Teuchos::RCP network_model; - Teuchos::RCP params; - Teuchos::RCP comm; - Teuchos::Array< Teuchos::RCP > observers; - - Teuchos::Array< Teuchos::RCP > solvers; - Teuchos::Array< Teuchos::RCP > sgSolvers; - int n_models; - Teuchos::Array p_indices; - Teuchos::Array g_indices; - Teuchos::Array n_p; - Teuchos::Array n_g; - Teuchos::Array num_params; - Teuchos::Array num_responses; - int num_params_total; - int num_responses_total; - bool supports_W; - Teuchos::Array< std::pair > param_map; - Teuchos::Array< std::pair > response_map; - - mutable Teuchos::Array solver_inargs; - mutable Teuchos::Array solver_outargs; - - Teuchos::Array< Teuchos::RCP > p_maps; - Teuchos::Array< Teuchos::RCP > g_maps; - - Teuchos::RCP x_map; - Teuchos::RCP f_map; - Teuchos::RCP x_overlap_map; - Teuchos::RCP f_overlap_map; - Teuchos::RCP x_importer; - Teuchos::RCP f_exporter; - Teuchos::RCP x_overlap; - Teuchos::RCP f_overlap; - Teuchos::RCP W_graph; - Teuchos::RCP W_overlap_graph; - Teuchos::RCP W_overlap; - Teuchos::RCP x_init; - - Teuchos::Array< Teuchos::RCP > p; - Teuchos::Array< Teuchos::RCP > g; - Teuchos::Array< EDerivativeMultiVectorOrientation > dgdp_layout; - Teuchos::Array< Teuchos::RCP > dgdp; - - // Stochastic Galerkin data - bool supports_x_sg; - bool supports_f_sg; - bool supports_W_sg; - mutable Teuchos::RCP sg_overlap_map; - mutable OutArgs::sg_vector_t x_sg_overlap; - mutable OutArgs::sg_vector_t f_sg_overlap; - mutable OutArgs::sg_operator_t W_sg_overlap; - mutable Teuchos::Array p_sg; - mutable Teuchos::Array g_sg; - mutable Teuchos::Array dgdp_sg_layout; - mutable Teuchos::Array > dgdp_sg; - - Teuchos::Array reduce_dimension; - mutable Teuchos::RCP > st_quad; - }; - - } - -} - -#endif diff --git a/packages/piro/src/Piro_Epetra_StokhosMPSolver.cpp b/packages/piro/src/Piro_Epetra_StokhosMPSolver.cpp deleted file mode 100644 index 930b9b41aac2..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosMPSolver.cpp +++ /dev/null @@ -1,230 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#include "Piro_Epetra_StokhosMPSolver.hpp" - -#include "Piro_Epetra_SolverFactory.hpp" -#include "Piro_Provider.hpp" - -#include "Stokhos_Epetra.hpp" -#include "NOX_Epetra_ModelEvaluatorInterface.H" -#include "NOX_Epetra_LinearSystem_Stratimikos.H" -#include "NOX_Epetra_LinearSystem_MPBD.hpp" - -Piro::Epetra::StokhosMPSolver:: -StokhosMPSolver(const Teuchos::RCP& piroParams_, - const Teuchos::RCP& mpParams_, - const Teuchos::RCP& globalComm, - int block_size, int num_spatial_procs) : - piroParams(piroParams_), - mpParams(mpParams_), - num_mp(block_size) -{ - product_comm = - Stokhos::buildMultiComm(*globalComm, block_size, num_spatial_procs); -} - -Piro::Epetra::StokhosMPSolver::~StokhosMPSolver() -{ -} - -void -Piro::Epetra::StokhosMPSolver:: -setup(const Teuchos::RCP& model, - const Teuchos::RCP& noxObserver) -{ - Teuchos::RCP mp_comm = - Stokhos::getStochasticComm(product_comm); - Teuchos::RCP mp_block_map = - Teuchos::rcp(new Epetra_Map(num_mp, 0, *mp_comm)); - mp_model = model; - - // Turn mp_model into an MP-nonlinear problem - mp_nonlin_model = - Teuchos::rcp(new Stokhos::MPModelEvaluator(mp_model, product_comm, - mp_block_map, mpParams)); - - Piro::Epetra::SolverFactory solverFactory; - if (piroParams->get("Solver Type") == "NOX") - { - bool use_mpbd_solver = mpParams->get("Use MPBD Solver", false); - Teuchos::RCP linsys; - Teuchos::RCP nox_interface; - if (use_mpbd_solver) { - nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(mp_nonlin_model)); - Teuchos::RCP A = - mp_nonlin_model->create_W(); - Teuchos::RCP M = - mp_nonlin_model->create_WPrec()->PrecOp; - Teuchos::RCP iReq = - nox_interface; - Teuchos::RCP iJac = - nox_interface; - Teuchos::RCP iPrec = - nox_interface; - - Teuchos::ParameterList& noxParams = piroParams->sublist("NOX"); - Teuchos::ParameterList& printParams = noxParams.sublist("Printing"); - Teuchos::ParameterList& newtonParams = - noxParams.sublist("Direction").sublist("Newton"); - Teuchos::ParameterList& noxstratlsParams = - newtonParams.sublist("Stratimikos Linear Solver"); - Teuchos::ParameterList& mpbdParams = - mpParams->sublist("MPBD Linear Solver"); - mpbdParams.sublist("Deterministic Solver Parameters") = - noxstratlsParams; - Teuchos::RCP inner_A = model->create_W(); - Teuchos::RCP inner_nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(model)); - Teuchos::RCP inner_iReq = - inner_nox_interface; - Teuchos::RCP inner_iJac = - inner_nox_interface; - Teuchos::RCP inner_u = model->get_x_init(); - Teuchos::RCP inner_linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemStratimikos( - printParams, - noxstratlsParams, - inner_iJac, inner_A, *inner_u)); - linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemMPBD(printParams, - mpbdParams, - inner_linsys, - iReq, iJac, A, - model->get_x_map())); - } - - solverFactory.setSource(nox_interface); - solverFactory.setSource(linsys); - } - // Create solver to map p -> g - mp_solver = solverFactory.createSolver(piroParams, mp_nonlin_model); - - // Create MP inverse model evaluator to map p_mp -> g_mp - Teuchos::Array mp_p_index_map = - mp_nonlin_model->get_p_mp_map_indices(); - Teuchos::Array mp_g_index_map = - mp_nonlin_model->get_g_mp_map_indices(); - Teuchos::Array< Teuchos::RCP > base_g_maps = - mp_nonlin_model->get_g_mp_base_maps(); - mp_g_index_map.push_back(base_g_maps.size()); - base_g_maps.push_back(model->get_x_map()); - mp_inverse_solver = - Teuchos::rcp(new Stokhos::MPInverseModelEvaluator(mp_solver, - mp_p_index_map, - mp_g_index_map, - base_g_maps)); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver:: -getSpatialComm() const -{ - return Stokhos::getSpatialComm(product_comm); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver:: -getStochasticComm() const -{ - return Stokhos::getStochasticComm(product_comm); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver:: -getGlobalMultiComm() const -{ - return product_comm; -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_x_map() const -{ - return mp_inverse_solver->get_x_map(); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_f_map() const -{ - return mp_inverse_solver->get_f_map(); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_p_map(int l) const -{ - return mp_inverse_solver->get_p_map(l); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_g_map(int j) const -{ - return mp_inverse_solver->get_g_map(j); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_x_init() const -{ - return mp_inverse_solver->get_x_init(); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::get_p_init(int l) const -{ - return mp_nonlin_model->get_p_init(l); -} - -EpetraExt::ModelEvaluator::InArgs -Piro::Epetra::StokhosMPSolver::createInArgs() const -{ - return mp_inverse_solver->createInArgs(); -} - -EpetraExt::ModelEvaluator::OutArgs -Piro::Epetra::StokhosMPSolver::createOutArgs() const -{ - return mp_inverse_solver->createOutArgs(); -} - -void -Piro::Epetra::StokhosMPSolver::evalModel(const InArgs& inArgs, - const OutArgs& outArgs ) const -{ - mp_inverse_solver->evalModel(inArgs, outArgs); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::create_g_mp(int l, Epetra_DataAccess CV, - const Epetra_Vector* v) const -{ - OutArgs outargs = mp_nonlin_model->createOutArgs(); - int ng = outargs.Ng(); - //if (piroParams->get("Solver Type") == "NOX" && l == ng) { - if (l == ng) { - return mp_nonlin_model->create_x_mp(CV, v); - } - else - return mp_nonlin_model->create_g_mp(l, CV, v); -} - -Teuchos::RCP -Piro::Epetra::StokhosMPSolver::create_g_mv_mp(int l, int num_vecs, - Epetra_DataAccess CV, - const Epetra_MultiVector* v) const -{ - OutArgs outargs = mp_nonlin_model->createOutArgs(); - int ng = outargs.Ng(); - //if (piroParams->get("Solver Type") == "NOX" && l == ng) { - if (l == ng) { - return mp_nonlin_model->create_x_mv_mp(num_vecs, CV, v); - } - else - return mp_nonlin_model->create_g_mv_mp(l, num_vecs, CV, v); -} - diff --git a/packages/piro/src/Piro_Epetra_StokhosMPSolver.hpp b/packages/piro/src/Piro_Epetra_StokhosMPSolver.hpp deleted file mode 100644 index 8634b5f687b3..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosMPSolver.hpp +++ /dev/null @@ -1,168 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef PIRO_EPETRA_MP_STOKHOS_SOLVER_H -#define PIRO_EPETRA_MP_STOKHOS_SOLVER_H - -#include "EpetraExt_ModelEvaluator.h" -#include "EpetraExt_MultiComm.h" - -#include "Stokhos_MPModelEvaluator.hpp" -#include "Stokhos_MPInverseModelEvaluator.hpp" - -#include "NOX_Epetra_Observer.H" - -namespace Piro { -namespace Epetra { - - /*! - * \brief An epetra model evaluator adapter for setting up a multi-point - * solver. - */ - class StokhosMPSolver : public EpetraExt::ModelEvaluator { - public: - - /** \name Constructors/initializers */ - //@{ - - //! Constructor - StokhosMPSolver(const Teuchos::RCP& piroParams, - const Teuchos::RCP& mpParams, - const Teuchos::RCP& globalComm, - int block_size, int num_spatial_procs); - - //! Get spatial comm - Teuchos::RCP getSpatialComm() const; - - //! Get stochastic comm - Teuchos::RCP getStochasticComm() const; - - //! Get global multi-comm - Teuchos::RCP getGlobalMultiComm() const; - - //! Setup rest of model evaluator - void setup( - const Teuchos::RCP& model, - const Teuchos::RCP& noxObserver = Teuchos::null); - - - //@} - - ~StokhosMPSolver(); - - - /** \name Overridden from EpetraExt::ModelEvaluator . */ - //@{ - - /** \brief . */ - Teuchos::RCP get_p_map(int l) const; - - /** \brief . */ - Teuchos::RCP get_g_map(int j) const; - - /** \brief . */ - Teuchos::RCP get_p_init(int l) const; - - /** \brief . */ - // Teuchos::RCP create_W() const; - /** \brief . */ - EpetraExt::ModelEvaluator::InArgs createInArgs() const; - /** \brief . */ - EpetraExt::ModelEvaluator::OutArgs createOutArgs() const; - /** \brief . */ - void evalModel( const InArgs& inArgs, const OutArgs& outArgs ) const; - - //@} - - /** \name Accessors */ - //@{ - - Teuchos::RCP - get_mp_model() const { return mp_nonlin_model; } - - //! Set initial solution polynomial - void set_x_mp_init(const Stokhos::ProductEpetraVector& x_mp_in) { - mp_nonlin_model->set_x_mp_init(x_mp_in); - } - - //! Return initial MP x - Teuchos::RCP - get_x_mp_init() const { - return mp_nonlin_model->get_x_mp_init(); - } - - //! Set initial parameter polynomial - void set_p_mp_init(int i, const Stokhos::ProductEpetraVector& p_mp_in) { - mp_nonlin_model->set_p_mp_init(i, p_mp_in); - } - - //! Get initial parameter polynomial - Teuchos::RCP - get_p_mp_init(int l) const { - return mp_nonlin_model->get_p_mp_init(l); - } - - //! Create vector orthog poly using x map and owned mp map - Teuchos::RCP - create_x_mp(Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const { - return mp_nonlin_model->create_x_mp(CV, v); - } - - //! Create vector orthog poly using p map - Teuchos::RCP - create_p_mp(int l, Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const { - return mp_nonlin_model->create_p_mp(l, CV, v); - } - - //! Create multi-point vector using p map - Teuchos::RCP - create_p_mv_mp(int l, int num_vecs, Epetra_DataAccess CV = Copy, - const Epetra_MultiVector* v = NULL) const { - return mp_nonlin_model->create_p_mv_mp(l, num_vecs, CV, v); - } - - //! Create vector orthog poly using g map - Teuchos::RCP - create_g_mp(int l, Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const; - - //! Create multi-vector orthog poly using g map - Teuchos::RCP - create_g_mv_mp(int l, int num_vecs, Epetra_DataAccess CV = Copy, - const Epetra_MultiVector* v = NULL) const; - - //@} - - private: - - /** \brief . */ - Teuchos::RCP get_x_map() const; - /** \brief . */ - Teuchos::RCP get_f_map() const; - /** \brief . */ - Teuchos::RCP get_x_init() const; - - private: - - Teuchos::RCP piroParams; - Teuchos::RCP mpParams; - Teuchos::RCP product_comm; - Teuchos::RCP mp_model; - Teuchos::RCP mp_nonlin_model; - Teuchos::RCP mp_solver; - Teuchos::RCP mp_inverse_solver; - int num_mp; - - }; - -} -} -#endif diff --git a/packages/piro/src/Piro_Epetra_StokhosNOXObserver.cpp b/packages/piro/src/Piro_Epetra_StokhosNOXObserver.cpp deleted file mode 100644 index d92fc5d92571..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosNOXObserver.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#include "Piro_Epetra_StokhosNOXObserver.hpp" -#include "Stokhos_EpetraVectorOrthogPoly.hpp" - -Piro::Epetra::StokhosNOXObserver::StokhosNOXObserver ( - const Teuchos::RCP& noxObserver_, - const Teuchos::RCP >& basis_, const Teuchos::RCP& stoch_map_, - const Teuchos::RCP& spatial_map_, - const Teuchos::RCP& product_map_, - const Teuchos::RCP& product_comm_, - const Teuchos::RCP& importer_, - int save_moments_) : - noxObserver(noxObserver_), - basis(basis_), - stoch_map(stoch_map_), - spatial_map(spatial_map_), - product_map(product_map_), - product_comm(product_comm_), - importer(importer_), - numSGBlocks(basis->size()), - save_moments(save_moments_) -{ - //if (noxObserver == Teuchos::null) cout << "XXX1" << endl; - if (save_moments > 0) - moment = Teuchos::rcp(new Epetra_Vector(*spatial_map)); - if (product_map != Teuchos::null) - overlap_vec = Teuchos::rcp(new Epetra_Vector(*product_map)); -} - -void Piro::Epetra::StokhosNOXObserver::observeSolution( - const Epetra_Vector& solution) -{ - - if (noxObserver == Teuchos::null) - return; - - // Copy into block vector, so Block access is available - overlap_vec->Import(solution, *importer, Insert); - Stokhos::EpetraVectorOrthogPoly vec_poly( - basis, stoch_map, spatial_map, product_map, product_comm, View, - *overlap_vec); - if (save_moments <= 0) { - for (int i=0; i< numSGBlocks; i++) { - noxObserver->observeSolution(vec_poly[i], i); - } - } - else { - // Always write out first moment - vec_poly.computeMean(*moment); - noxObserver->observeSolution(*moment, 1); - if (save_moments >= 2) { - vec_poly.computeStandardDeviation(*moment); - noxObserver->observeSolution(*moment, 2); - } - } - - -} diff --git a/packages/piro/src/Piro_Epetra_StokhosNOXObserver.hpp b/packages/piro/src/Piro_Epetra_StokhosNOXObserver.hpp deleted file mode 100644 index 440197ed77d5..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosNOXObserver.hpp +++ /dev/null @@ -1,57 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef PIRO_EPETRA_STOKHOSNOXOBSERVER -#define PIRO_EPETRA_STOKHOSNOXOBSERVER - -#include "NOX_Epetra_Observer.H" -#include "EpetraExt_BlockVector.h" -#include "Epetra_Map.h" -#include "Teuchos_RCP.hpp" -#include "Stokhos_OrthogPolyBasis.hpp" -#include "EpetraExt_MultiComm.h" -#include "Epetra_Import.h" - -namespace Piro { -namespace Epetra { - -class StokhosNOXObserver : public NOX::Epetra::Observer -{ -public: - StokhosNOXObserver ( - const Teuchos::RCP& noxObserver_, - const Teuchos::RCP >& basis_, - const Teuchos::RCP& stoch_map_, - const Teuchos::RCP& spatial_map_, - const Teuchos::RCP& product_map_, - const Teuchos::RCP& product_comm_, - const Teuchos::RCP& importer_, - int save_moments_ = -1); - - void observeSolution(const Epetra_Vector& soln); - -private: - - Teuchos::RCP noxObserver; - Teuchos::RCP > basis; - Teuchos::RCP stoch_map; - Teuchos::RCP spatial_map; - Teuchos::RCP product_map; - Teuchos::RCP product_comm; - Teuchos::RCP importer; - const int numSGBlocks; - int save_moments; - Teuchos::RCP moment; - Teuchos::RCP overlap_vec; -}; - -} -} - -#endif //PIRO_EPETRA_STOKHOSNOXOBSERVER diff --git a/packages/piro/src/Piro_Epetra_StokhosSolver.cpp b/packages/piro/src/Piro_Epetra_StokhosSolver.cpp deleted file mode 100644 index 172bb9d46b3b..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosSolver.cpp +++ /dev/null @@ -1,155 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#include "Piro_Epetra_StokhosSolver.hpp" -#include "Stokhos_Epetra.hpp" - -Piro::Epetra::StokhosSolver:: -StokhosSolver(const Teuchos::RCP& piroParams_, - const Teuchos::RCP& globalComm) : - piroParams(piroParams_), - sg_solver_factory(piroParams_, globalComm) -{ -} - -Piro::Epetra::StokhosSolver::~StokhosSolver() -{ -} - -void -Piro::Epetra::StokhosSolver:: -setup(const Teuchos::RCP& model, - const Teuchos::RCP& noxObserver) -{ - sg_nonlin_model = sg_solver_factory.createSGModel(model); - const Teuchos::RCP sg_observer = - sg_solver_factory.createSGObserver(noxObserver); - const Teuchos::RCP sg_block_solver = - sg_solver_factory.createSGSolver(sg_nonlin_model, sg_observer); - sg_solver = sg_solver_factory.createSGSolverAdapter(sg_block_solver); -} - -void -Piro::Epetra::StokhosSolver:: -resetSolverParameters(const Teuchos::ParameterList& new_solver_params) -{ - sg_solver_factory.resetSolverParameters(new_solver_params); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver:: -getSpatialComm() const -{ - return sg_solver_factory.getSpatialComm(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver:: -getStochasticComm() const -{ - return sg_solver_factory.getStochasticComm(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver:: -getGlobalMultiComm() const -{ - return sg_solver_factory.getGlobalMultiComm(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_x_map() const -{ - return sg_solver->get_x_map(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_f_map() const -{ - return sg_solver->get_f_map(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_p_map(int l) const -{ - return sg_solver->get_p_map(l); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_g_map(int j) const -{ - return sg_solver->get_g_map(j); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_x_init() const -{ - return sg_solver->get_x_init(); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::get_p_init(int l) const -{ - return sg_solver->get_p_init(l); -} - -EpetraExt::ModelEvaluator::InArgs -Piro::Epetra::StokhosSolver::createInArgs() const -{ - return sg_solver->createInArgs(); -} - -EpetraExt::ModelEvaluator::OutArgs -Piro::Epetra::StokhosSolver::createOutArgs() const -{ - return sg_solver->createOutArgs(); -} - -void -Piro::Epetra::StokhosSolver::evalModel(const InArgs& inArgs, - const OutArgs& outArgs ) const -{ - sg_solver->evalModel(inArgs, outArgs); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::create_g_sg(int l, Epetra_DataAccess CV, - const Epetra_Vector* v) const -{ - OutArgs outargs = sg_nonlin_model->createOutArgs(); - int ng = outargs.Ng(); - Piro::Epetra::StokhosSolverFactory::SG_METHOD sg_method = - sg_solver_factory.getSGMethod(); - if (sg_method != Piro::Epetra::StokhosSolverFactory::SG_NI && - sg_method != Piro::Epetra::StokhosSolverFactory::SG_MPNI && - piroParams->get("Solver Type") == "NOX" && l == ng) { - return sg_nonlin_model->create_x_sg(CV, v); - } - else - return sg_nonlin_model->create_g_sg(l, CV, v); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolver::create_g_mv_sg(int l, int num_vecs, - Epetra_DataAccess CV, - const Epetra_MultiVector* v) const -{ - OutArgs outargs = sg_nonlin_model->createOutArgs(); - int ng = outargs.Ng(); - Piro::Epetra::StokhosSolverFactory::SG_METHOD sg_method = - sg_solver_factory.getSGMethod(); - if (sg_method != Piro::Epetra::StokhosSolverFactory::SG_NI && - sg_method != Piro::Epetra::StokhosSolverFactory::SG_MPNI && - piroParams->get("Solver Type") == "NOX" && l == ng) { - return sg_nonlin_model->create_x_mv_sg(num_vecs, CV, v); - } - else - return sg_nonlin_model->create_g_mv_sg(l, num_vecs, CV, v); -} - diff --git a/packages/piro/src/Piro_Epetra_StokhosSolver.hpp b/packages/piro/src/Piro_Epetra_StokhosSolver.hpp deleted file mode 100644 index 25c943338b3a..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosSolver.hpp +++ /dev/null @@ -1,154 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef PIRO_EPETRA_STOKHOS_SOLVER_H -#define PIRO_EPETRA_STOKHOS_SOLVER_H - -#include "EpetraExt_ModelEvaluator.h" -#include "Piro_Epetra_StokhosSolverFactory.hpp" - -namespace Piro { -namespace Epetra { - - class StokhosSolver : public EpetraExt::ModelEvaluator { - public: - - /** \name Constructors/initializers */ - //@{ - - //! Constructor - StokhosSolver(const Teuchos::RCP& piroParams, - const Teuchos::RCP& globalComm); - - //! Get spatial comm - Teuchos::RCP getSpatialComm() const; - - //! Get stochastic comm - Teuchos::RCP getStochasticComm() const; - - //! Get global multi-comm - Teuchos::RCP getGlobalMultiComm() const; - - //! Setup rest of model evaluator - void setup(const Teuchos::RCP& model, - const Teuchos::RCP& noxObserver = Teuchos::null); - - //! Reset Stokhos solver parameters - void resetSolverParameters(const Teuchos::ParameterList& new_solver_params); - - - //@} - - ~StokhosSolver(); - - - /** \name Overridden from EpetraExt::ModelEvaluator . */ - //@{ - - /** \brief . */ - Teuchos::RCP get_p_map(int l) const; - - /** \brief . */ - Teuchos::RCP get_g_map(int j) const; - - /** \brief . */ - Teuchos::RCP get_p_init(int l) const; - - /** \brief . */ - // Teuchos::RCP create_W() const; - /** \brief . */ - EpetraExt::ModelEvaluator::InArgs createInArgs() const; - /** \brief . */ - EpetraExt::ModelEvaluator::OutArgs createOutArgs() const; - /** \brief . */ - void evalModel( const InArgs& inArgs, const OutArgs& outArgs ) const; - - //@} - - /** \name Accessors */ - //@{ - - Teuchos::RCP > - getBasis() const { return sg_solver_factory.getBasis(); } - - Teuchos::RCP > - getQuad() const { return sg_solver_factory.getQuad(); } - - Teuchos::RCP - get_sg_model() const { return sg_nonlin_model; } - - //! Set initial solution polynomial - void set_x_sg_init(const Stokhos::EpetraVectorOrthogPoly& x_sg_in) { - sg_nonlin_model->set_x_sg_init(x_sg_in); - } - - //! Return initial SG x - Teuchos::RCP - get_x_sg_init() const { - return sg_nonlin_model->get_x_sg_init(); - } - - //! Set initial parameter polynomial - void set_p_sg_init(int i, const Stokhos::EpetraVectorOrthogPoly& p_sg_in) { - sg_nonlin_model->set_p_sg_init(i, p_sg_in); - } - - //! Get initial parameter polynomial - Teuchos::RCP - get_p_sg_init(int l) const { - return sg_nonlin_model->get_p_sg_init(l); - } - - //! Create vector orthog poly using x map and owned sg map - Teuchos::RCP - create_x_sg(Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const { - return sg_nonlin_model->create_x_sg(CV, v); - } - - //! Create vector orthog poly using p map - Teuchos::RCP - create_p_sg(int l, Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const { - return sg_nonlin_model->create_p_sg(l, CV, v); - } - - //! Create vector orthog poly using g map - Teuchos::RCP - create_g_sg(int l, Epetra_DataAccess CV = Copy, - const Epetra_Vector* v = NULL) const; - - //! Create multi-vector orthog poly using g map - Teuchos::RCP - create_g_mv_sg(int l, int num_vecs, Epetra_DataAccess CV = Copy, - const Epetra_MultiVector* v = NULL) const; - - //@} - - private: - - /** \brief . */ - Teuchos::RCP get_x_map() const; - /** \brief . */ - Teuchos::RCP get_f_map() const; - /** \brief . */ - Teuchos::RCP get_x_init() const; - - private: - - Teuchos::RCP piroParams; - Piro::Epetra::StokhosSolverFactory sg_solver_factory; - Teuchos::RCP sg_nonlin_model; - Teuchos::RCP sg_solver; - - }; - -} -} -#endif diff --git a/packages/piro/src/Piro_Epetra_StokhosSolverFactory.cpp b/packages/piro/src/Piro_Epetra_StokhosSolverFactory.cpp deleted file mode 100644 index c54f76ccec49..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosSolverFactory.cpp +++ /dev/null @@ -1,552 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#include "Piro_Epetra_StokhosSolver.hpp" - -#include "Piro_Epetra_SolverFactory.hpp" -#include "Piro_Provider.hpp" - -#include "Stokhos.hpp" -#include "Stokhos_Epetra.hpp" -#include "Stokhos_SGModelEvaluator.hpp" -#include "Stokhos_SGModelEvaluator_Interlaced.hpp" -#include "Stokhos_SGModelEvaluator_Adaptive.hpp" - -#include "Teuchos_VerboseObjectParameterListHelpers.hpp" - -#include "NOX_Epetra_ModelEvaluatorInterface.H" -#include "NOX_Epetra_LinearSystem_Stratimikos.H" -#include "NOX_Epetra_LinearSystem_MPBD.hpp" -#include "NOX_Epetra_LinearSystem_SGGS.hpp" -#include "NOX_Epetra_LinearSystem_SGJacobi.hpp" - -Piro::Epetra::StokhosSolverFactory:: -StokhosSolverFactory(const Teuchos::RCP& piroParams_, - const Teuchos::RCP& globalComm) : - piroParams(piroParams_) -{ - // Setup VerboseObject - Teuchos::readVerboseObjectSublist(piroParams.get(), this); - Teuchos::RCP out = this->getOStream(); - Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel(); - - // Validate parameters - Teuchos::ParameterList& sgParams = - piroParams->sublist("Stochastic Galerkin"); - sgParams.validateParameters(*getValidSGParameters(),0); - - sgSolverParams = - //Teuchos::rcp(&(sgParams.sublist("SG Solver Parameters")),false); - Teuchos::rcp(new Teuchos::ParameterList(sgParams.sublist("SG Solver Parameters"))); - - // Get SG expansion type - std::string sg_type = sgParams.get("SG Method", "Direct"); - if (sg_type == "Direct" || sg_type == "AD") - sg_method = SG_AD; - else if (sg_type == "Global") - sg_method = SG_GLOBAL; - else if (sg_type == "Non-intrusive") - sg_method = SG_NI; - else if (sg_type == "Multi-point Non-intrusive") - sg_method = SG_MPNI; - else - TEUCHOS_TEST_FOR_EXCEPTION( - true, Teuchos::Exceptions::InvalidParameter, - std::endl << "Error! Piro::Epetra::StokhosSolverFactor(): " << - "Invalid SG Method " << sg_type << std::endl); - - // Get SG ME type - std::string sg_me_type = sgParams.get("SG ModelEvaluator Method", "Default"); - if (sg_me_type == "Default") - sg_me_method = SG_ME_DEFAULT; - else if (sg_me_type == "Interlaced") - sg_me_method = SG_ME_INTERLACED; - else if (sg_me_type == "Adaptive") - sg_me_method = SG_ME_ADAPTIVE; - else - TEUCHOS_TEST_FOR_EXCEPTION( - true, Teuchos::Exceptions::InvalidParameter, - std::endl << "Error! Piro::Epetra::StokhosSolverFactor(): " << - "Invalid SG ModelEvaluator Method " << sg_me_type << std::endl); - - // Create SG basis - basis = Stokhos::BasisFactory::create(sgParams); - if (verbLevel != Teuchos::VERB_NONE) - *out << "Basis size = " << basis->size() << std::endl; - - // Create SG Quadrature - Teuchos::ParameterList& expParams = sgParams.sublist("Expansion"); - std::string exp_type = expParams.get("Type", "Quadrature"); - if (exp_type == "Quadrature" || - sg_method == SG_GLOBAL || - sg_method == SG_NI || - sg_method == SG_MPNI) { - quad = Stokhos::QuadratureFactory::create(sgParams); - if (verbLevel != Teuchos::VERB_NONE) - *out << "Quadrature size = " << quad->size() << std::endl; - } - - // Create SG expansion & triple-product - if (sg_method != SG_NI && sg_method != SG_MPNI) { - expansion = - Stokhos::ExpansionFactory::create(sgParams); - Cijk = - sgParams.get< Teuchos::RCP > >("Triple Product Tensor"); - } - - // Create stochastic parallel distribution - int num_spatial_procs = - sgParams.get("Number of Spatial Processors", -1); - int num_stoch_blocks; - if (sg_method == SG_MPNI) - num_stoch_blocks = quad->size(); - else - num_stoch_blocks = basis->size(); - sg_comm = - Stokhos::buildMultiComm(*globalComm, num_stoch_blocks, num_spatial_procs); - sg_parallel_data = - Teuchos::rcp(new Stokhos::ParallelData(basis, Cijk, sg_comm, sgParams)); - -} - -void -Piro::Epetra::StokhosSolverFactory:: -resetSolverParameters(const Teuchos::ParameterList& new_solver_params) -{ - *sgSolverParams = new_solver_params; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -createSGModel(const Teuchos::RCP& model_) -{ - Teuchos::ParameterList& sgParams = - piroParams->sublist("Stochastic Galerkin"); - sgParams.sublist("Basis"); - - model = model_; - - // Set up stochastic Galerkin model - Teuchos::RCP sg_model; - if (sg_method == SG_AD) { - sg_model = model; - } - else if (sg_method == SG_MPNI) { - int num_mp = quad->size(); - Teuchos::RCP mp_comm = - Stokhos::getStochasticComm(sg_comm); - Teuchos::RCP mp_block_map = - Teuchos::rcp(new Epetra_Map(num_mp, 0, *mp_comm)); - Teuchos::RCP mp_model = model; - - // Turn mp_model into an MP-nonlinear problem - Teuchos::RCP mpParams = - Teuchos::rcp(&(sgParams.sublist("MP Solver Parameters")),false); - Teuchos::RCP mp_nonlinear_model = - Teuchos::rcp(new Stokhos::MPModelEvaluator(mp_model, sg_comm, - mp_block_map, mpParams)); - - bool use_mpbd_solver = mpParams->get("Use MPBD Solver", false); - Teuchos::RCP linsys; - Teuchos::RCP nox_interface; - if (use_mpbd_solver) { - nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(mp_nonlinear_model)); - Teuchos::RCP A = - mp_nonlinear_model->create_W(); - Teuchos::RCP M = - mp_nonlinear_model->create_WPrec()->PrecOp; - Teuchos::RCP iReq = - nox_interface; - Teuchos::RCP iJac = - nox_interface; - Teuchos::RCP iPrec = - nox_interface; - - Teuchos::ParameterList& noxParams = piroParams->sublist("NOX"); - Teuchos::ParameterList& printParams = noxParams.sublist("Printing"); - Teuchos::ParameterList& newtonParams = - noxParams.sublist("Direction").sublist("Newton"); - Teuchos::ParameterList& noxstratlsParams = - newtonParams.sublist("Stratimikos Linear Solver"); - Teuchos::ParameterList& mpbdParams = - mpParams->sublist("MPBD Linear Solver"); - mpbdParams.sublist("Deterministic Solver Parameters") = - noxstratlsParams; - Teuchos::RCP inner_A = model->create_W(); - Teuchos::RCP inner_nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(model)); - Teuchos::RCP inner_iReq = - inner_nox_interface; - Teuchos::RCP inner_iJac = - inner_nox_interface; - Teuchos::RCP inner_u = model->get_x_init(); - Teuchos::RCP inner_linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemStratimikos( - printParams, - noxstratlsParams, - inner_iJac, inner_A, *inner_u)); - linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemMPBD(printParams, - mpbdParams, - inner_linsys, - iReq, iJac, A, - model->get_x_map())); - } - - Piro::Epetra::SolverFactory solverFactory; - solverFactory.setSource(nox_interface); - solverFactory.setSource(linsys); - - // Create solver to map p -> g - const Teuchos::RCP mp_solver - = solverFactory.createSolver(piroParams, mp_nonlinear_model); - - // Create MP inverse model evaluator to map p_mp -> g_mp - Teuchos::Array mp_p_index_map = - mp_nonlinear_model->get_p_mp_map_indices(); - Teuchos::Array mp_g_index_map = - mp_nonlinear_model->get_g_mp_map_indices(); - Teuchos::Array< Teuchos::RCP > base_g_maps = - mp_nonlinear_model->get_g_mp_base_maps(); - mp_g_index_map.push_back(base_g_maps.size()); - base_g_maps.push_back(model->get_x_map()); - Teuchos::RCP mp_inverse_solver = - Teuchos::rcp(new Stokhos::MPInverseModelEvaluator(mp_solver, - mp_p_index_map, - mp_g_index_map, - base_g_maps)); - - // Create MP-based SG Quadrature model evaluator to calculate g_sg - sg_model = - Teuchos::rcp(new Stokhos::SGQuadMPModelEvaluator(mp_inverse_solver, - sg_comm, - mp_block_map)); - } - else { - Teuchos::RCP underlying_model; - if (sg_method == SG_GLOBAL) { - underlying_model = model; - } else { - Piro::Epetra::SolverFactory solverFactory; - underlying_model = solverFactory.createSolver(piroParams, model); - } - sg_model = - Teuchos::rcp(new Stokhos::SGQuadModelEvaluator(underlying_model)); - } - - // Set up SG nonlinear model - if (sg_me_method == SG_ME_DEFAULT) - sg_nonlin_model = - Teuchos::rcp(new Stokhos::SGModelEvaluator(sg_model, basis, - quad, expansion, - sg_parallel_data, - sgSolverParams)); - else if (sg_me_method == SG_ME_INTERLACED) - sg_nonlin_model = - Teuchos::rcp(new Stokhos::SGModelEvaluator_Interlaced(sg_model, basis, - quad, expansion, - sg_parallel_data, - sgSolverParams)); - else { - - // Get row basis vector - Teuchos::ParameterList& sgAdaptParams = - piroParams->sublist("Stochastic Galerkin").sublist("Adaptivity"); - typedef Teuchos::RCP< std::vector > > > row_basis_vec_type; - row_basis_vec_type row_basis_vec ; - if (sgAdaptParams.isParameter("Adaptive Basis Vector")) - row_basis_vec = - sgAdaptParams.get("Adaptive Basis Vector"); - - // If it isn't supplied, fill it with a uniform basis - if (row_basis_vec == Teuchos::null) { - row_basis_vec = - Teuchos::rcp(new std::vector > >(sg_model->get_x_map()->NumMyElements(), - Teuchos::rcp_dynamic_cast >(basis))); - } - - sg_nonlin_model = - Teuchos::rcp(new Stokhos::SGModelEvaluator_Adaptive(sg_model, basis, - *row_basis_vec, - quad, expansion, - sg_parallel_data, - false,-1, - sgSolverParams)); - } - - // Set up stochastic parameters - // One sublist for each stochastic parameter *vector*, and each parameter - // vector can provide an initial set of expansion coefficients in the basis. - // This decouples the stochastic parameters from the SG basis allowing e.g., - // more stochastic parameters than fundamental r.v.'s in the basis - // (for correlation) or fewer. - Teuchos::ParameterList& sgParameters = sgParams.sublist("SG Parameters"); - bool set_initial_params = sgParameters.get("Set Initial SG Parameters", true); - if (set_initial_params) { - int num_param_vectors = - sgParameters.get("Number of SG Parameter Vectors", 1); - Teuchos::Array point(basis->dimension(), 1.0); - Teuchos::Array basis_vals(basis->size()); - basis->evaluateBases(point, basis_vals); - int idx=0; - for (int i=0; i sg_p = - sg_nonlin_model->create_p_sg(p_vec); - - // Initalize sg parameter vector - int num_params = sg_p->coefficientMap()->NumMyElements(); - for (int j=0; j initial_p_vals; - initial_p_vals = pList.get(ss2.str(),initial_p_vals); - if (initial_p_vals.size() == 0) { - // Default to mean-zero linear expansion, ie, p_j = \xi_j, - // by setting term j+1 to 1 (unnormalized) - (*sg_p)[idx+1][j] = 1.0 / basis_vals[idx+1]; - } - else - for (Teuchos::Array::size_type l=0; lset_p_sg_init(p_vec, *sg_p); - } - } - - // Setup stochastic initial guess - if (sg_method != SG_NI && sg_method != SG_MPNI) { - Teuchos::RCP sg_x = - sg_nonlin_model->create_x_sg(); - sg_x->init(0.0); - if (sg_x->myGID(0)) - (*sg_x)[0] = *(model->get_x_init()); - sg_nonlin_model->set_x_sg_init(*sg_x); - } - - return sg_nonlin_model; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -createSGObserver(const Teuchos::RCP& noxObserver) -{ - // Set up Observer to call noxObserver for each vector block - Teuchos::RCP sgnoxObserver; - - Teuchos::ParameterList& sgParams = piroParams->sublist("Stochastic Galerkin"); - if (noxObserver != Teuchos::null && sg_method != SG_NI && sg_method != SG_MPNI) { - int save_moments = sgParams.get("Save Moments",-1); - sgnoxObserver = - Teuchos::rcp(new Piro::Epetra::StokhosNOXObserver( - noxObserver, basis, - sg_nonlin_model->get_overlap_stochastic_map(), - model->get_x_map(), - sg_nonlin_model->get_x_sg_overlap_map(), - sg_comm, sg_nonlin_model->get_x_sg_importer(), save_moments)); - } - - return sgnoxObserver; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -createSGSolver(const Teuchos::RCP& sg_model, - const Teuchos::RCP& sg_observer) -{ - // Get SG solver type - std::string solve_type = sgSolverParams->get("SG Solver Algorithm", "Krylov"); - SG_SOLVER solve_method; - if (solve_type == "Krylov") - solve_method = SG_KRYLOV; - else if (solve_type == "Gauss-Seidel") - solve_method = SG_GS; - else if (solve_type == "Jacobi") - solve_method = SG_JACOBI; - else - TEUCHOS_TEST_FOR_EXCEPTION(true, Teuchos::Exceptions::InvalidParameter, - std::endl << "Error! ENAT_SGNOXSolver(): " << - "Invalid Solver Algorithm " << solve_type << std::endl); - - Teuchos::RCP sg_block_solver; - if (sg_method != SG_NI && sg_method != SG_MPNI) { - Piro::Epetra::SolverFactory solverFactory; - - Teuchos::RCP sg_linsys = Teuchos::null; - if (solve_method==SG_GS || solve_method==SG_JACOBI) { - // Create NOX interface - Teuchos::RCP det_nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(model)); - - // Create NOX linear system object - Teuchos::RCP det_u = model->get_x_init(); - Teuchos::RCP det_A = model->create_W(); - Teuchos::RCP det_iReq = det_nox_interface; - Teuchos::RCP det_iJac = det_nox_interface; - //Teuchos::ParameterList det_printParams; - Teuchos::ParameterList& noxParams = piroParams->sublist("NOX"); - Teuchos::ParameterList& det_printParams = noxParams.sublist("Printing"); - Teuchos::ParameterList& printParams = noxParams.sublist("Printing"); - Teuchos::ParameterList& newtonParams = - noxParams.sublist("Direction").sublist("Newton"); - Teuchos::ParameterList& det_lsParams = - newtonParams.sublist("Stratimikos Linear Solver"); - - Teuchos::RCP det_linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemStratimikos( - det_printParams, det_lsParams, det_iJac, - det_A, *det_u)); - - // Sublist for linear solver for the Newton method - //Teuchos::ParameterList& lsParams = newtonParams.sublist("Linear Solver"); - Teuchos::ParameterList& sgjacobiParams = - newtonParams.sublist("Linear Solver"); - // Create NOX interface - Teuchos::RCP nox_interface = - Teuchos::rcp(new NOX::Epetra::ModelEvaluatorInterface(sg_model)); - Teuchos::RCP base_map = model->get_x_map(); - Teuchos::RCP sg_map = sg_model->get_x_map(); - Teuchos::RCP A = sg_model->create_W(); - Teuchos::RCP iReq = nox_interface; - Teuchos::RCP iJac = nox_interface; - - if (solve_method==SG_GS) { - sgjacobiParams.sublist("Deterministic Solver Parameters") = det_lsParams; - - sg_linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemSGGS( - printParams, sgjacobiParams, det_linsys, iReq, iJac, - basis, sg_parallel_data, A, base_map, sg_map)); - } - - else if (solve_method==SG_JACOBI) { - sgjacobiParams.sublist("Deterministic Solver Parameters") = det_lsParams; - Teuchos::ParameterList& jacobiOpParams = - sgjacobiParams.sublist("Jacobi SG Operator"); - jacobiOpParams.set("Only Use Linear Terms", true); - sg_linsys = - Teuchos::rcp(new NOX::Epetra::LinearSystemSGJacobi( - printParams, sgjacobiParams, det_linsys, iReq, iJac, - basis, sg_parallel_data, A, base_map, sg_map)); - } - - solverFactory.setSource(sg_linsys); - } - - solverFactory.setSource(sg_observer); - - // Will find preconditioner for Matrix-Free method - sg_block_solver = solverFactory.createSolver(piroParams, sg_model); - } - else - sg_block_solver = sg_model; - - return sg_block_solver; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -createSGSolverAdapter(const Teuchos::RCP& sg_solver) -{ - // Create SG Inverse model evaluator - Teuchos::Array sg_p_index_map = sg_nonlin_model->get_p_sg_map_indices(); - Teuchos::Array sg_g_index_map = sg_nonlin_model->get_g_sg_map_indices(); - Teuchos::Array< Teuchos::RCP > base_g_maps = - sg_nonlin_model->get_g_sg_base_maps(); - // Add sg_u response function supplied by Piro::Epetra::NOXSolver - if (sg_method != SG_NI && sg_method != SG_MPNI && - piroParams->get("Solver Type") == "NOX") { - sg_g_index_map.push_back(base_g_maps.size()); - base_g_maps.push_back(model->get_x_map()); - } - Teuchos::RCP sg_adapter = - Teuchos::rcp(new Stokhos::SGInverseModelEvaluator(sg_solver, - sg_p_index_map, - sg_g_index_map, - base_g_maps)); - - return sg_adapter; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -createRSModel(const Teuchos::RCP& sg_model) -{ - // Create ResponseStatistic model evaluator - Teuchos::Array< Teuchos::RCP > base_g_maps = - sg_nonlin_model->get_g_sg_base_maps(); - // Add sg_u response function supplied by Piro::Epetra::NOXSolver - if (sg_method != SG_NI && sg_method != SG_MPNI && - piroParams->get("Solver Type", "NOX") == "NOX") { - base_g_maps.push_back(model->get_x_map()); - } - Teuchos::RCP block_map = - sg_nonlin_model->get_overlap_stochastic_map(); - Teuchos::RCP rs_model = - Teuchos::rcp(new Stokhos::ResponseStatisticModelEvaluator( - sg_model, base_g_maps, basis, sg_comm, block_map)); - - return rs_model; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -getSpatialComm() const -{ - return Stokhos::getSpatialComm(sg_comm); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -getStochasticComm() const -{ - return Stokhos::getStochasticComm(sg_comm); -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory:: -getGlobalMultiComm() const -{ - return sg_comm; -} - -Teuchos::RCP -Piro::Epetra::StokhosSolverFactory::getValidSGParameters() const -{ - Teuchos::RCP validPL = - Teuchos::rcp(new Teuchos::ParameterList("ValidSGParams"));; - validPL->sublist("SG Parameters", false, ""); - validPL->sublist("SG Solver Parameters", false, ""); - validPL->sublist("MP Solver Parameters", false, ""); - validPL->sublist("Basis", false, ""); - validPL->sublist("Pseudospectral Operator", false, ""); - validPL->sublist("Expansion", false, ""); - validPL->sublist("Quadrature", false, ""); - validPL->set("SG Method", "",""); - validPL->set("SG ModelEvaluator Method", "",""); - validPL->set("Triple Product Size", "",""); - validPL->set("Rebalance Stochastic Graph", false, ""); - validPL->set("Save Moments", -1, "Set to 2 for Mean and Variance. Default writes Coeffs"); - validPL->set("Number of Spatial Processors", -1, ""); - validPL->sublist("Isorropia", false, ""); - validPL->sublist("Response KL", false, ""); - validPL->sublist("Adaptivity", false, ""); - - return validPL; -} diff --git a/packages/piro/src/Piro_Epetra_StokhosSolverFactory.hpp b/packages/piro/src/Piro_Epetra_StokhosSolverFactory.hpp deleted file mode 100644 index 7a9c6de6c171..000000000000 --- a/packages/piro/src/Piro_Epetra_StokhosSolverFactory.hpp +++ /dev/null @@ -1,148 +0,0 @@ -// @HEADER -// ***************************************************************************** -// Piro: Strategy package for embedded analysis capabilitites -// -// Copyright 2010 NTESS and the Piro contributors. -// SPDX-License-Identifier: BSD-3-Clause -// ***************************************************************************** -// @HEADER - -#ifndef PIRO_EPETRA_STOKHOS_SOLVER_FACTORY_H -#define PIRO_EPETRA_STOKHOS_SOLVER_FACTORY_H - -#include "EpetraExt_ModelEvaluator.h" -#include "Teuchos_VerboseObject.hpp" -#include "Piro_Epetra_StokhosNOXObserver.hpp" - -#include "Stokhos_SGModelEvaluatorBase.hpp" -#include "Stokhos_SGInverseModelEvaluator.hpp" -#include "Stokhos_OrthogPolyBasis.hpp" -#include "Stokhos_Quadrature.hpp" -#include "Stokhos_OrthogPolyExpansion.hpp" -#include "Stokhos_Sparse3Tensor.hpp" -#include "Stokhos_ParallelData.hpp" -#include "EpetraExt_MultiComm.h" - -namespace Piro { -namespace Epetra { - - class StokhosSolverFactory : - public Teuchos::VerboseObject { - public: - - //! SG method - enum SG_METHOD { - SG_AD, - SG_GLOBAL, - SG_NI, - SG_MPNI - }; - - //! SG ModelEvaluator method - enum SG_ME_METHOD { - SG_ME_DEFAULT, - SG_ME_INTERLACED, - SG_ME_ADAPTIVE - }; - - //! Constructor - StokhosSolverFactory(const Teuchos::RCP& piroParams, - const Teuchos::RCP& globalComm); - - //! Reset Stokhos solver parameters - void resetSolverParameters(const Teuchos::ParameterList& new_solver_params); - - /** \name Factory methods */ - //@{ - - //! Create stochastic model evaluator - Teuchos::RCP createSGModel( - const Teuchos::RCP& model); - - //! Create stochastic observer - Teuchos::RCP createSGObserver( - const Teuchos::RCP& noxObserver); - - //! Create stochastic solver - Teuchos::RCP createSGSolver( - const Teuchos::RCP& sg_model, - const Teuchos::RCP& sg_observer = Teuchos::null); - - //! Create stochastic solver adapter - Teuchos::RCP createSGSolverAdapter( - const Teuchos::RCP& sg_solver); - - //! Create response statistic model evaluator - Teuchos::RCP createRSModel( - const Teuchos::RCP& sg_model); - - //@} - - /** \name Accessors */ - //@{ - - //! Get spatial comm - Teuchos::RCP getSpatialComm() const; - - //! Get stochastic comm - Teuchos::RCP getStochasticComm() const; - - //! Get global multi-comm - Teuchos::RCP getGlobalMultiComm() const; - - //! Get stochastic basis - Teuchos::RCP > - getBasis() const { return basis; } - - //! Get quadrature rule - Teuchos::RCP > - getQuad() const { return quad; } - - //! Get SG method - SG_METHOD getSGMethod() const { return sg_method; } - - //! Get SG ME method - SG_ME_METHOD getSGMEMethod() const { return sg_me_method; } - - Teuchos::RCP > - getExpansion() const { return expansion; } - - Teuchos::RCP getParallelData() const - { return sg_parallel_data; } - - //@} - - private: - - //! Get valid parameters - Teuchos::RCP - getValidSGParameters() const; - - private: - - enum SG_SOLVER { - SG_KRYLOV, - SG_GS, - SG_JACOBI - }; - - Teuchos::RCP piroParams; - Teuchos::RCP sgSolverParams; - - SG_METHOD sg_method; - SG_ME_METHOD sg_me_method; - Teuchos::RCP > basis; - Teuchos::RCP > quad; - Teuchos::RCP > expansion; - Teuchos::RCP > Cijk; - Teuchos::RCP sg_comm; - Teuchos::RCP sg_parallel_data; - - Teuchos::RCP model; - Teuchos::RCP sg_nonlin_model; - - }; - -} -} -#endif diff --git a/packages/piro/test/MockModelEval_C.cpp b/packages/piro/test/MockModelEval_C.cpp index f6acef19d2e7..20cb651ffc32 100644 --- a/packages/piro/test/MockModelEval_C.cpp +++ b/packages/piro/test/MockModelEval_C.cpp @@ -13,10 +13,6 @@ #include "Epetra_LocalMap.h" #include "Epetra_CrsMatrix.h" -#ifdef HAVE_PIRO_STOKHOS -#include "Stokhos_Epetra.hpp" -#endif - using Teuchos::RCP; using Teuchos::rcp; @@ -146,15 +142,6 @@ MockModelEval_C::createInArgs() const inArgs.set_Np(1); inArgs.setSupports(IN_ARG_x, true); -#ifdef HAVE_PIRO_STOKHOS - inArgs.setSupports(IN_ARG_x_sg, true); - inArgs.setSupports(IN_ARG_x_dot_sg, true); - inArgs.setSupports(IN_ARG_p_sg, 0, true); // 1 SG parameter vector - inArgs.setSupports(IN_ARG_sg_basis, true); - inArgs.setSupports(IN_ARG_sg_quadrature, true); - inArgs.setSupports(IN_ARG_sg_expansion, true); -#endif - return inArgs; } @@ -173,15 +160,6 @@ MockModelEval_C::createOutArgs() const outArgs.setSupports(OUT_ARG_DgDx, 0, DERIV_TRANS_MV_BY_ROW); outArgs.setSupports(OUT_ARG_DgDp, 0, 0, DERIV_MV_BY_COL); -#ifdef HAVE_PIRO_STOKHOS - outArgs.setSupports(OUT_ARG_f_sg, true); - outArgs.setSupports(OUT_ARG_W_sg, true); - outArgs.setSupports(OUT_ARG_g_sg, 0, true); - outArgs.setSupports(OUT_ARG_DfDp_sg, 0, DERIV_MV_BY_COL); - outArgs.setSupports(OUT_ARG_DgDx_sg, 0, DERIV_TRANS_MV_BY_ROW); - outArgs.setSupports(OUT_ARG_DgDp_sg, 0, 0, DERIV_MV_BY_COL); -#endif - return outArgs; } @@ -253,86 +231,4 @@ MockModelEval_C::evalModel(const InArgs& inArgs, const OutArgs& outArgs) const (*dgdp)[0][0] = p; } } - - // - // Stochastic calculation - // - -#ifdef HAVE_PIRO_STOKHOS - // Parse InArgs - RCP > basis = - inArgs.get_sg_basis(); - RCP > expn = - inArgs.get_sg_expansion(); - InArgs::sg_const_vector_t x_sg = inArgs.get_x_sg(); - InArgs::sg_const_vector_t p_sg = inArgs.get_p_sg(0); - - Stokhos::OrthogPolyApprox x(basis), x2(basis); - if (x_sg != Teuchos::null && proc == 0) { - for (int i=0; isize(); i++) { - x[i] = (*x_sg)[i][0]; - } - expn->times(x2, x, x); - } - - Stokhos::OrthogPolyApprox p(basis), p2(basis); - if (p_sg != Teuchos::null) { - for (int i=0; isize(); i++) { - p[i] = (*p_sg)[i][0]; - } - expn->times(p2, p, p); - } - - // Parse OutArgs - OutArgs::sg_vector_t f_sg = outArgs.get_f_sg(); - if (f_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*f_sg)[block][0] = 0.5*(x2[block] - p2[block]); - } - } - - OutArgs::sg_operator_t W_sg = outArgs.get_W_sg(); - if (W_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - Teuchos::RCP W = - Teuchos::rcp_dynamic_cast(W_sg->getCoeffPtr(block), - true); - int i = 0; - int ret = W->ReplaceMyValues(i, 1, &x[block], &i); - if (ret != 0) - std::cout << "ReplaceMyValues returned " << ret << "!" << std::endl; - } - } - - RCP dfdp_sg = - outArgs.get_DfDp_sg(0).getMultiVector(); - if (dfdp_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*dfdp_sg)[block][0][0] = -p[block]; - } - } - - OutArgs::sg_vector_t g_sg = outArgs.get_g_sg(0); - if (g_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*g_sg)[block][0] = 0.5*(x2[block] + p2[block]); - } - } - - RCP dgdx_sg = - outArgs.get_DgDx_sg(0).getMultiVector(); - if (dgdx_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*dgdx_sg)[block][0][0] = x[block]; - } - } - - RCP dgdp_sg = - outArgs.get_DgDp_sg(0,0).getMultiVector(); - if (dgdp_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*dgdp_sg)[block][0][0] = p[block]; - } - } -#endif } diff --git a/packages/piro/test/MockModelEval_D.cpp b/packages/piro/test/MockModelEval_D.cpp index 14a3ed668f8b..3cdcfdc29b7f 100644 --- a/packages/piro/test/MockModelEval_D.cpp +++ b/packages/piro/test/MockModelEval_D.cpp @@ -13,10 +13,6 @@ #include "Epetra_LocalMap.h" #include "Epetra_CrsMatrix.h" -#ifdef HAVE_PIRO_STOKHOS -#include "Stokhos_Epetra.hpp" -#endif - using Teuchos::RCP; using Teuchos::rcp; @@ -157,15 +153,6 @@ createInArgs() const inArgs.set_Np(2); inArgs.setSupports(IN_ARG_x, true); -#ifdef HAVE_PIRO_STOKHOS - inArgs.setSupports(IN_ARG_x_sg, true); - inArgs.setSupports(IN_ARG_p_sg, 0, true); - inArgs.setSupports(IN_ARG_p_sg, 1, true); - inArgs.setSupports(IN_ARG_sg_basis, true); - inArgs.setSupports(IN_ARG_sg_quadrature, true); - inArgs.setSupports(IN_ARG_sg_expansion, true); -#endif - return inArgs; } @@ -187,17 +174,6 @@ createOutArgs() const outArgs.setSupports(OUT_ARG_DgDp, 0, 0, DERIV_MV_BY_COL); outArgs.setSupports(OUT_ARG_DgDp, 0, 1, DERIV_MV_BY_COL); -#ifdef HAVE_PIRO_STOKHOS - outArgs.setSupports(OUT_ARG_f_sg, true); - outArgs.setSupports(OUT_ARG_W_sg, true); - outArgs.setSupports(OUT_ARG_g_sg, 0, true); - outArgs.setSupports(OUT_ARG_DfDp_sg, 0, DERIV_MV_BY_COL); - outArgs.setSupports(OUT_ARG_DfDp_sg, 1, DERIV_MV_BY_COL); - outArgs.setSupports(OUT_ARG_DgDx_sg, 0, DERIV_TRANS_MV_BY_ROW); - outArgs.setSupports(OUT_ARG_DgDp_sg, 0, 0, DERIV_MV_BY_COL); - outArgs.setSupports(OUT_ARG_DgDp_sg, 0, 1, DERIV_MV_BY_COL); -#endif - return outArgs; } @@ -282,96 +258,4 @@ evalModel(const InArgs& inArgs, const OutArgs& outArgs) const (*dgdp2)[0][0] = 0.0; } } - - // - // Stochastic calculation - // - -#ifdef HAVE_PIRO_STOKHOS - // Parse InArgs - RCP > basis = - inArgs.get_sg_basis(); - RCP > expn = - inArgs.get_sg_expansion(); - InArgs::sg_const_vector_t x_sg = inArgs.get_x_sg(); - InArgs::sg_const_vector_t p1_sg = inArgs.get_p_sg(0); - InArgs::sg_const_vector_t p2_sg = inArgs.get_p_sg(1); - - // Parse OutArgs - OutArgs::sg_vector_t f_sg = outArgs.get_f_sg(); - if (f_sg != Teuchos::null && proc == 0) { - for (int block=0; blocksize(); block++) { - (*f_sg)[block][0] = - (*x_sg)[block][0] - (*p1_sg)[block][0] + (*p2_sg)[block][0]; - } - } - - OutArgs::sg_operator_t W_sg = outArgs.get_W_sg(); - if (W_sg != Teuchos::null) { - W_sg->init(0.0); - Teuchos::RCP W = - Teuchos::rcp_dynamic_cast(W_sg->getCoeffPtr(0), - true); - if (proc == 0) { - int i = 0; - double val = 1.0; - W->ReplaceMyValues(i, 1, &val, &i); - } - } - - RCP dfdp1_sg = - outArgs.get_DfDp_sg(0).getMultiVector(); - if (dfdp1_sg != Teuchos::null) { - dfdp1_sg->init(0.0); - if (proc == 0) { - (*dfdp1_sg)[0][0][0] = -1.0; - } - } - RCP dfdp2_sg = - outArgs.get_DfDp_sg(1).getMultiVector(); - if (dfdp2_sg != Teuchos::null) { - dfdp2_sg->init(0.0); - if (proc == 0) { - (*dfdp2_sg)[0][0][0] = 1.0; - } - } - - Stokhos::OrthogPolyApprox x(basis); - if (x_sg != Teuchos::null && proc == 0) { - for (int i=0; isize(); i++) { - x[i] = (*x_sg)[i][0]; - } - } - - OutArgs::sg_vector_t g_sg = outArgs.get_g_sg(0); - if (g_sg != Teuchos::null && proc == 0) { - Stokhos::OrthogPolyApprox xinv(basis); - expn->divide(xinv, 1.0, x); - for (int block=0; blocksize(); block++) { - (*g_sg)[block][0] = xinv[block]; - } - } - - RCP dgdx_sg = - outArgs.get_DgDx_sg(0).getMultiVector(); - if (dgdx_sg != Teuchos::null && proc == 0) { - Stokhos::OrthogPolyApprox x2(basis), x2inv(basis); - expn->times(x2, x, x); - expn->divide(x2inv, -1.0, x2); - for (int block=0; blocksize(); block++) { - (*dgdx_sg)[block][0][0] = x2inv[block]; - } - } - - RCP dgdp1_sg = - outArgs.get_DgDp_sg(0,0).getMultiVector(); - if (dgdp1_sg != Teuchos::null) { - dgdp1_sg->init(0.0); - } - RCP dgdp2_sg = - outArgs.get_DgDp_sg(0,1).getMultiVector(); - if (dgdp2_sg != Teuchos::null) { - dgdp2_sg->init(0.0); - } -#endif } diff --git a/packages/piro/test/Piro_UnitTests.cpp b/packages/piro/test/Piro_UnitTests.cpp index fb58dd0be75a..c3a323a31090 100644 --- a/packages/piro/test/Piro_UnitTests.cpp +++ b/packages/piro/test/Piro_UnitTests.cpp @@ -15,20 +15,6 @@ #include "Piro_ConfigDefs.hpp" #ifdef HAVE_PIRO_NOX #include "Piro_Epetra_NOXSolver.hpp" -#ifdef HAVE_PIRO_STOKHOS -#include "Stokhos_Epetra.hpp" -#include "Piro_Epetra_StokhosSolverFactory.hpp" -#include "MockModelEval_C.hpp" - -#include "Piro_Epetra_StokhosSolver.hpp" -#include "Piro_Epetra_NECoupledModelEvaluator.hpp" -#include "MockModelEval_D.hpp" - -#include "Thyra_EpetraModelEvaluator.hpp" -#include "Piro_PerformAnalysis.hpp" -#include "Thyra_VectorBase.hpp" -#include "Thyra_DetachedVectorView.hpp" -#endif #endif #include "Piro_Epetra_SolverFactory.hpp" @@ -117,38 +103,6 @@ void testSensitivities(const std::string& inputFile, } } -#ifdef HAVE_PIRO_STOKHOS -int testResponses(const Epetra_Vector& g, - const Teuchos::Array testValues, - double absTol, double relTol, - const std::string& tag, - Teuchos::FancyOStream& out) -{ - int failures = 0; - TEUCHOS_TEST_FOR_EXCEPTION(g.MyLength() != testValues.size(), - std::logic_error, - tag << " Test Values array has size " << - testValues.size() << "but expected size " << - g.MyLength()); - for (int i=0; i default_out = - Teuchos::VerboseObjectBase::getDefaultOStream(); - Teuchos::VerboseObjectBase::setDefaultOStream(rcp(&out,false)); - - // Create a communicator for Epetra objects - RCP globalComm; -#ifdef HAVE_MPI - globalComm = rcp(new Epetra_MpiComm(MPI_COMM_WORLD)); -#else - globalComm = rcp(new Epetra_SerialComm); -#endif - - std::string xml_filename = "input_SGSolve.xml"; - - // Set up application parameters - RCP appParams = - Teuchos::getParametersFromXmlFile(xml_filename); - - // Create stochastic Galerkin solver factory - RCP piroParams = - rcp(&(appParams->sublist("Piro")),false); - setOStream(rcp(&out,false), *piroParams); - Piro::Epetra::StokhosSolverFactory sg_solver_factory(piroParams, - globalComm); - - // Get comm for spatial problem - RCP app_comm = sg_solver_factory.getSpatialComm(); - - // Create application model evaluator - RCP model = rcp(new MockModelEval_C(app_comm)); - - // Setup rest of solver - RCP sg_model = - sg_solver_factory.createSGModel(model); - RCP sg_solver = - sg_solver_factory.createSGSolver(sg_model); - RCP rs_model = - sg_solver_factory.createRSModel(sg_solver); - - // Evaluate SG responses at SG parameters - EpetraExt::ModelEvaluator::InArgs sg_inArgs = rs_model->createInArgs(); - EpetraExt::ModelEvaluator::OutArgs sg_outArgs = - rs_model->createOutArgs(); - int p_index = 1; // PC expansion coefficients of params - int g_index = 0; - int num_g = 2; - int x_index = num_g-1; - int g_mean_index = g_index + num_g; - int g_var_index = g_index + 2*num_g; - RCP p_init = rs_model->get_p_init(p_index); - RCP g = - rcp(new Epetra_Vector(*(rs_model->get_g_map(g_index)))); - RCP x = - rcp(new Epetra_Vector(*(rs_model->get_g_map(x_index)))); - RCP g_mean = - rcp(new Epetra_Vector(*(rs_model->get_g_map(g_mean_index)))); - RCP g_var = - rcp(new Epetra_Vector(*(rs_model->get_g_map(g_var_index)))); - RCP dgdp_mean = - rcp(new Epetra_MultiVector( - *(rs_model->get_p_map(p_index)), - rs_model->get_g_map(g_mean_index)->NumMyElements())); - RCP dgdp_var = - rcp(new Epetra_MultiVector( - *(rs_model->get_p_map(p_index)), - rs_model->get_g_map(g_var_index)->NumMyElements())); - - sg_outArgs.set_g(g_index, g); - sg_outArgs.set_g(x_index, x); - sg_outArgs.set_g(g_mean_index, g_mean); - sg_outArgs.set_g(g_var_index, g_var); - sg_outArgs.set_DgDp( - g_mean_index, p_index, - EpetraExt::ModelEvaluator::Derivative( - dgdp_mean, - EpetraExt::ModelEvaluator::DERIV_TRANS_MV_BY_ROW - ) - ); - sg_outArgs.set_DgDp( - g_var_index, p_index, - EpetraExt::ModelEvaluator::Derivative( - dgdp_var, - EpetraExt::ModelEvaluator::DERIV_TRANS_MV_BY_ROW - ) - ); - - rs_model->evalModel(sg_inArgs, sg_outArgs); - - // Test derivatives with finite differences - double delta = 1.0e-6; - int num_p = rs_model->get_p_map(p_index)->NumMyElements(); - int num_resp = model->get_g_map(g_index)->NumMyElements(); - Teuchos::RCP p_pert = - Teuchos::rcp(new Epetra_Vector((*rs_model->get_p_map(p_index)))); - Teuchos::RCP g_mean_pert = - Teuchos::rcp(new Epetra_Vector(*(rs_model->get_g_map(g_mean_index)))); - Teuchos::RCP g_var_pert = - Teuchos::rcp(new Epetra_Vector(*(rs_model->get_g_map(g_var_index)))); - Teuchos::RCP dgdp_mean_fd = - Teuchos::rcp(new Epetra_MultiVector(*(rs_model->get_p_map(p_index)), - num_resp)); - Teuchos::RCP dgdp_var_fd = - Teuchos::rcp(new Epetra_MultiVector(*(rs_model->get_p_map(p_index)), - num_resp)); - EpetraExt::ModelEvaluator::InArgs sg_inArgs_pert = - rs_model->createInArgs(); - EpetraExt::ModelEvaluator::OutArgs sg_outArgs_pert = - rs_model->createOutArgs(); - sg_inArgs_pert.set_p(p_index, p_pert); - sg_outArgs_pert.set_g(g_mean_index, g_mean_pert); - sg_outArgs_pert.set_g(g_var_index, g_var_pert); - for (int i=0; iPutScalar(0.0); - g_var_pert->PutScalar(0.0); - - // Compute perturbed g - rs_model->evalModel(sg_inArgs_pert, sg_outArgs_pert); - - // Compute FD derivatives - for (int j=0; j piroParams1 = - Teuchos::getParametersFromXmlFile(problem1_filename); - setOStream(rcp(&out,false), *piroParams1); - RCP model1 = rcp(new MockModelEval_D(globalComm)); - - // Setup problem 2 - RCP piroParams2 = - Teuchos::getParametersFromXmlFile(problem2_filename); - setOStream(rcp(&out,false), *piroParams2); - RCP model2 = rcp(new MockModelEval_D(globalComm)); - - // Setup coupled model - RCP coupledParams = - Teuchos::getParametersFromXmlFile(coupled_filename); - setOStream(rcp(&out,false), *coupledParams); - Teuchos::Array< RCP > models(2); - models[0] = model1; models[1] = model2; - Teuchos::Array< RCP > piroParams(2); - piroParams[0] = piroParams1; piroParams[1] = piroParams2; - RCP network_model = - rcp(new Piro::Epetra::ParamToResponseNetworkModel); - RCP coupledModel = - rcp(new Piro::Epetra::NECoupledModelEvaluator(models, piroParams, - network_model, - coupledParams, globalComm)); - coupledModel->setOStream(rcp(&out,false)); - - // Setup solver - Piro::Epetra::SolverFactory solverFactory; - RCP coupledSolver = - solverFactory.createSolver(coupledParams, coupledModel); - - // Solve coupled system - EpetraExt::ModelEvaluator::InArgs inArgs = coupledSolver->createInArgs(); - EpetraExt::ModelEvaluator::OutArgs outArgs = coupledSolver->createOutArgs(); - for (int i=0; iget_p_init(i)); - for (int i=0; i g = - rcp(new Epetra_Vector(*(coupledSolver->get_g_map(i)))); - outArgs.set_g(i, g); - } - coupledSolver->evalModel(inArgs, outArgs); - - // Regression tests - int failures = 0; - Teuchos::ParameterList& testParams = - coupledParams->sublist("Regression Tests"); - double relTol = testParams.get("Relative Tolerance", 1.0e-3); - double absTol = testParams.get("Absolute Tolerance", 1.0e-8); - - // Print results - for (int i=0; i g = outArgs.get_g(i); - if (g != Teuchos::null) { - out << "Response vector " << i << ":" << std::endl; - g->Print(out); - - // Test response - std::stringstream ss1; - ss1 << "Response " << i << " Test Values"; - bool testResponse = - testParams.isType< Teuchos::Array >(ss1.str()); - if (testResponse) { - Teuchos::Array testValues = - testParams.get >(ss1.str()); - failures += testResponses(*g, testValues, absTol, relTol, "Response", - out); - } - - } - } - - success = failures == 0; - Teuchos::VerboseObjectBase::setDefaultOStream(default_out); -} - -TEUCHOS_UNIT_TEST( Piro, SGCoupled ) -{ - using Teuchos::RCP; - using Teuchos::rcp; - using Teuchos::ParameterList; - - RCP default_out = - Teuchos::VerboseObjectBase::getDefaultOStream(); - Teuchos::VerboseObjectBase::setDefaultOStream(rcp(&out,false)); - - // Create a communicator for Epetra objects - RCP globalComm; -#ifdef HAVE_MPI - globalComm = rcp(new Epetra_MpiComm(MPI_COMM_WORLD)); -#else - globalComm = rcp(new Epetra_SerialComm); -#endif - - std::string problem1_filename = "input_problem1_sg.xml"; - std::string problem2_filename = "input_problem2_sg.xml"; - std::string coupled_filename = "input_coupled_sg.xml"; - - // Setup stochastic coupled problem to get spatial comm's - RCP coupledParams = - Teuchos::getParametersFromXmlFile(coupled_filename); - setOStream(rcp(&out,false), *coupledParams); - RCP coupledSolver = - rcp(new Piro::Epetra::StokhosSolver(coupledParams, globalComm)); - RCP app_comm = coupledSolver->getSpatialComm(); - - // Setup problem 1 - RCP piroParams1 = - Teuchos::getParametersFromXmlFile(problem1_filename); - setOStream(rcp(&out,false), *piroParams1); - RCP model1 = rcp(new MockModelEval_D(app_comm)); - - // Setup problem 2 - RCP piroParams2 = - Teuchos::getParametersFromXmlFile(problem2_filename); - setOStream(rcp(&out,false), *piroParams2); - RCP model2 = rcp(new MockModelEval_D(app_comm)); - - // Setup coupled model - Teuchos::Array< RCP > models(2); - models[0] = model1; models[1] = model2; - Teuchos::Array< RCP > piroParams(2); - piroParams[0] = piroParams1; piroParams[1] = piroParams2; - RCP network_model = - rcp(new Piro::Epetra::ParamToResponseNetworkModel); - RCP coupledModel = - rcp(new Piro::Epetra::NECoupledModelEvaluator(models, piroParams, - network_model, - coupledParams, globalComm)); - coupledModel->setOStream(rcp(&out,false)); - - // Setup solver - coupledSolver->setup(coupledModel); - - Teuchos::RCP x_sg_init = - coupledSolver->get_x_sg_init(); - Teuchos::RCP x_sg_init_new = - Teuchos::rcp(new Stokhos::EpetraVectorOrthogPoly(*x_sg_init)); - Teuchos::RCP > basis = - coupledSolver->getBasis(); - for (int i=0; idimension(); i++) - (*x_sg_init_new)[i+1].PutScalar(1.0); - coupledSolver->set_x_sg_init(*x_sg_init_new); - - // Solve coupled system - EpetraExt::ModelEvaluator::InArgs inArgs = coupledSolver->createInArgs(); - EpetraExt::ModelEvaluator::OutArgs outArgs = coupledSolver->createOutArgs(); - for (int i=0; iget_p_sg_init(i)); - for (int i=0; i g_sg = - coupledSolver->create_g_sg(i); - outArgs.set_g_sg(i, g_sg); - } - coupledSolver->evalModel(inArgs, outArgs); - - // Regression tests - int failures = 0; - Teuchos::ParameterList& testParams = - coupledParams->sublist("Regression Tests"); - double relTol = testParams.get("Relative Tolerance", 1.0e-3); - double absTol = testParams.get("Absolute Tolerance", 1.0e-8); - - - // Print results - for (int i=0; i g_sg = - outArgs.get_g_sg(i); - if (g_sg != Teuchos::null) { - Epetra_Vector g_mean(*(coupledSolver->get_g_map(i))); - Epetra_Vector g_std_dev(*(coupledSolver->get_g_map(i))); - g_sg->computeMean(g_mean); - g_sg->computeStandardDeviation(g_std_dev); - out.precision(12); - out << "Response " << i << " Mean = " << std::endl - << g_mean << std::endl; - out << "Response " << i << " Std. Dev. = " << std::endl - << g_std_dev << std::endl; - out << "Response vector " << i << ":" << std::endl - << *(outArgs.get_g_sg(i)) << std::endl; - - // Test mean - std::stringstream ss1; - ss1 << "Response " << i << " Mean Test Values"; - bool testMean = - testParams.isType< Teuchos::Array >(ss1.str()); - if (testMean) { - Teuchos::Array testValues = - testParams.get >(ss1.str()); - failures += testResponses(g_mean, testValues, absTol, relTol, "Mean", - out); - } - - // Test std. dev. - std::stringstream ss2; - ss2 << "Response " << i << " Standard Deviation Test Values"; - bool testSD = - testParams.isType< Teuchos::Array >(ss2.str()); - if (testSD) { - Teuchos::Array testValues = - testParams.get >(ss2.str()); - failures += testResponses(g_std_dev, testValues, absTol, relTol, - "Standard Deviation", out); - } - - } - } - } - - success = failures == 0; - Teuchos::VerboseObjectBase::setDefaultOStream(default_out); -} -#endif #endif TEUCHOS_UNIT_TEST( Piro, Basic ) diff --git a/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in b/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in index 9daaa2f69860..a537bf9648c0 100644 --- a/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in +++ b/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in @@ -25,6 +25,9 @@ /* Define if want to build with CHOLMOD enabled */ #cmakedefine TACHO_HAVE_SUITESPARSE +/* Define if want to build with TrilinosSS enabled */ +#cmakedefine TACHO_HAVE_TRILINOS_SS + /* Define if want to build with VTune enabled */ #cmakedefine TACHO_HAVE_VTUNE diff --git a/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp b/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp index 450e04608954..613d7f25bd19 100644 --- a/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp +++ b/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp @@ -25,8 +25,11 @@ template int driver(int argc, char *argv[]) { std::string file = "test.mtx"; std::string graph_file = ""; std::string weight_file = ""; + int dofs_per_node = 1; + bool perturbPivot = false; int nrhs = 1; bool randomRHS = true; + bool onesRHS = false; std::string method_name = "chol"; int method = 1; // 1 - Chol, 2 - LDL, 3 - SymLU int small_problem_thres = 1024; @@ -47,6 +50,8 @@ template int driver(int argc, char *argv[]) { opts.set_option("file", "Input file (MatrixMarket SPD matrix)", &file); opts.set_option("graph", "Input condensed graph", &graph_file); opts.set_option("weight", "Input condensed graph weight", &weight_file); + opts.set_option("dofs-per-node", "# DoFs per node", &dofs_per_node); + opts.set_option("perturb", "Flag to perturb tiny pivots", &perturbPivot); opts.set_option("nrhs", "Number of RHS vectors", &nrhs); opts.set_option("method", "Solution method: chol, ldl, lu", &method_name); opts.set_option("small-problem-thres", "LAPACK is used smaller than this thres", &small_problem_thres); @@ -55,6 +60,7 @@ template int driver(int argc, char *argv[]) { opts.set_option("device-solve-thres", "Device function is used above this subproblem size", &device_solve_thres); opts.set_option("variant", "algorithm variant in levelset scheduling; 0, 1 and 2", &variant); opts.set_option("nstreams", "# of streams used in CUDA; on host, it is ignored", &nstreams); + opts.set_option("one-rhs", "Set RHS to be ones", &onesRHS); opts.set_option("no-warmup", "Flag to turn off warmup", &no_warmup); opts.set_option("nfacts", "# of factorizations to perform", &nfacts); opts.set_option("nsolves", "# of solves to perform", &nsolves); @@ -125,6 +131,8 @@ template int driver(int argc, char *argv[]) { if (!in.good()) { std::cout << "Failed in open the file: " << graph_file << std::endl; return -1; + } else if (verbose) { + std::cout << " > Condensed graph file: " << graph_file << std::endl; } in >> m_graph; @@ -135,8 +143,10 @@ template int driver(int argc, char *argv[]) { aj_graph = ordinal_type_array_host("aj", ap_graph(m_graph)); for (ordinal_type i = 0; i < m_graph; ++i) { const ordinal_type jbeg = ap_graph(i), jend = ap_graph(i + 1); - for (ordinal_type j = jbeg; j < jend; ++j) + for (ordinal_type j = jbeg; j < jend; ++j) { in >> aj_graph(j); + aj_graph(j) --; // base-one + } } } @@ -146,6 +156,8 @@ template int driver(int argc, char *argv[]) { if (!in.good()) { std::cout << "Failed in open the file: " << weight_file << std::endl; return -1; + } else if (verbose) { + std::cout << " > Weight file for condensed graph: " << weight_file << std::endl; } ordinal_type m(0); in >> m; @@ -160,17 +172,21 @@ template int driver(int argc, char *argv[]) { Tacho::Driver solver; /// common options - solver.setSolutionMethod(method); - solver.setSmallProblemThresholdsize(small_problem_thres); solver.setVerbose(verbose); + solver.setSolutionMethod(method); + solver.setLevelSetOptionAlgorithmVariant(variant); + solver.setLevelSetOptionNumStreams(nstreams); /// graph options solver.setOrderConnectedGraphSeparately(); /// levelset options + solver.setSmallProblemThresholdsize(small_problem_thres); solver.setLevelSetOptionDeviceFunctionThreshold(device_factor_thres, device_solve_thres); - solver.setLevelSetOptionAlgorithmVariant(variant); - solver.setLevelSetOptionNumStreams(nstreams); + if (perturbPivot) { + if (verbose) std::cout << " > perturb tiny pivots" << std::endl; + solver.useDefaultPivotTolerance(); + } auto values_on_device = Kokkos::create_mirror_view(typename device_type::memory_space(), A.Values()); Kokkos::deep_copy(values_on_device, A.Values()); @@ -178,7 +194,10 @@ template int driver(int argc, char *argv[]) { /// inputs are used for graph reordering and analysis if (m_graph > 0 && m_graph < A.NumRows()) solver.analyze(A.NumRows(), A.RowPtr(), A.Cols(), m_graph, ap_graph, aj_graph, aw_graph); - else + else if (dofs_per_node > 1) { + if (verbose) std::cout << " > DoFs / node = " << dofs_per_node << std::endl; + solver.analyze(A.NumRows(), dofs_per_node, A.RowPtr(), A.Cols()); + } else solver.analyze(A.NumRows(), A.RowPtr(), A.Cols()); /// create numeric tools and levelset tools @@ -202,7 +221,10 @@ template int driver(int argc, char *argv[]) { t("t", A.NumRows(), nrhs); // temp workspace (store permuted rhs) { - if (randomRHS) { + if (onesRHS) { + const value_type one(1.0); + Kokkos::deep_copy (b, one); + } else if (randomRHS) { Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(b, random, value_type(1)); } else { @@ -235,6 +257,7 @@ template int driver(int argc, char *argv[]) { std::cout << std::endl; std::cout << " Initi Time " << initi_time << std::endl; + std::cout << " > nnz = " << solver.getNumNonZerosU() << std::endl; std::cout << " Facto Time " << facto_time / (double)nfacts << std::endl; std::cout << " Solve Time " << solve_time / (double)nsolves << std::endl; std::cout << std::endl; diff --git a/packages/shylu/shylu_node/tacho/src/Tacho_CrsMatrixBase.hpp b/packages/shylu/shylu_node/tacho/src/Tacho_CrsMatrixBase.hpp index 5f5278497a86..ed9b3e0ae693 100644 --- a/packages/shylu/shylu_node/tacho/src/Tacho_CrsMatrixBase.hpp +++ b/packages/shylu/shylu_node/tacho/src/Tacho_CrsMatrixBase.hpp @@ -371,7 +371,8 @@ inline static void applyPermutationToCrsMatrixLower(/* */ CrsMatrixType &A, cons template inline double computeRelativeResidual(const CrsMatrixBase &A, const Kokkos::View &x, - const Kokkos::View &b) { + const Kokkos::View &b, + const bool verbose = false) { const bool test = (size_t(A.NumRows()) != size_t(A.NumCols()) || size_t(A.NumRows()) != size_t(b.extent(0)) || size_t(x.extent(0)) != size_t(b.extent(0)) || size_t(x.extent(1)) != size_t(b.extent(1))); if (test) @@ -405,6 +406,8 @@ inline double computeRelativeResidual(const CrsMatrixBase diff += arith_traits::real((h_b(i, p) - s) * arith_traits::conj(h_b(i, p) - s)); } } + if (verbose) + std::cout << " Relative residual norm = " << sqrt(diff) << " / " << sqrt(norm) << " = " << sqrt(diff/norm) << std::endl; return sqrt(diff / norm); } diff --git a/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp b/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp index 274f4c952092..29cd41feeb38 100644 --- a/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp +++ b/packages/shylu/shylu_node/tacho/src/Tacho_Driver.hpp @@ -16,6 +16,7 @@ /// \author Kyungjoo Kim (kyukim@sandia.gov) #include "Tacho.hpp" +#include "Tacho_Util.hpp" #include #include @@ -24,7 +25,7 @@ namespace Tacho { /// forward decl class Graph; -#if defined(TACHO_HAVE_METIS) +#if defined(TACHO_HAVE_METIS) || defined(TACHO_HAVE_TRILINOS_SS) class GraphTools_Metis; #else class GraphTools; @@ -42,6 +43,7 @@ template class NumericToolsLe template struct Driver { public: using value_type = ValueType; + using mag_type = typename ArithTraits::mag_type; using device_type = DeviceType; using exec_space = typename device_type::execution_space; using exec_memory_space = typename device_type::memory_space; @@ -63,7 +65,7 @@ template struct Driver { using crs_matrix_type = CrsMatrixBase; using crs_matrix_type_host = CrsMatrixBase; -#if defined(TACHO_HAVE_METIS) +#if defined(TACHO_HAVE_METIS) || defined(TACHO_HAVE_TRILINOS_SS) using graph_tools_type = GraphTools_Metis; #else using graph_tools_type = GraphTools; @@ -111,6 +113,7 @@ template struct Driver { ordinal_type_array_host _h_peri_graph; // ** symbolic factorization output + ordinal_type _nnz_u; // supernodes output ordinal_type _nsupernodes; ordinal_type_array _supernodes; @@ -160,6 +163,8 @@ template struct Driver { ordinal_type _variant; // algorithmic variant in levelset 0: naive, 1: invert diagonals ordinal_type _nstreams; // on cuda, multi streams are used + mag_type _pivot_tol; // tolerance for tiny pivot perturbation + // parallelism and memory constraint is made via this parameter ordinal_type _max_num_superblocks; // # of superblocks in the memoyrpool @@ -206,9 +211,14 @@ template struct Driver { void setLevelSetOptionNumStreams(const ordinal_type nstreams); void setLevelSetOptionAlgorithmVariant(const ordinal_type variant); + void setPivotTolerance(const mag_type pivot_tol); + void useNoPivotTolerance(); + void useDefaultPivotTolerance(); + /// /// get interface /// + ordinal_type getNumNonZerosU() const; ordinal_type getNumSupernodes() const; ordinal_type_array getSupernodes() const; ordinal_type_array getPermutationVector() const; @@ -222,6 +232,7 @@ template struct Driver { template int analyze(const ordinal_type m, const arg_size_type_array &ap, const arg_ordinal_type_array &aj, const bool duplicate = false) { + _m = m; if (duplicate) { @@ -270,6 +281,7 @@ template struct Driver { const arg_perm_type_array &perm, const arg_perm_type_array &peri, const bool duplicate = false) { _m = m; + // this takes the user-specified perm, such that analyze() won't call graph partitioner if (duplicate) { /// for most cases, ap and aj are from host; so construct ap and aj and mirror to device _h_ap = size_type_array_host(Kokkos::ViewAllocateWithoutInitializing("h_ap"), ap.extent(0)); @@ -375,6 +387,46 @@ template struct Driver { return analyze(); } + template + int analyze(const ordinal_type m, const ordinal_type blk_size, + const arg_size_type_array &ap, const arg_ordinal_type_array &aj, + const bool duplicate = false) { + + if (blk_size > 1) { + //condense graph before calling analyze + const size_type nnz = ap(m); + ordinal_type m_graph = m / blk_size; + size_type nnz_graph = nnz / (blk_size*blk_size); + TACHO_TEST_FOR_EXCEPTION((m != blk_size * m_graph || nnz != size_type(blk_size*blk_size) * nnz_graph), + std::logic_error, "Failed to initialize the condensed graph"); + + size_type_array_host ap_graph + (Kokkos::ViewAllocateWithoutInitializing("ap_graph"), 1+m_graph); + ordinal_type_array_host aj_graph + (Kokkos::ViewAllocateWithoutInitializing("aj_graph"), nnz_graph); + ordinal_type_array_host aw_graph + (Kokkos::ViewAllocateWithoutInitializing("wgs"), m_graph); + // condense the graph + nnz_graph = 0; + ap_graph(0) = 0; + for (ordinal_type i = 0; i < m; i += blk_size) { + for (size_type k = ap(i); k < ap(i+1); k++) { + if (aj(k)%blk_size == 0) { + aj_graph(nnz_graph) = aj(k)/blk_size; + nnz_graph++; + } + aw_graph(i/blk_size) = blk_size; + ap_graph((i/blk_size)+1) = nnz_graph; + } + } + TACHO_TEST_FOR_EXCEPTION((nnz != size_type(blk_size*blk_size) * nnz_graph), + std::logic_error, "Failed to condense graph"); + return analyze(m, ap, aj, m_graph, ap_graph, aj_graph, aw_graph, duplicate); + } else { + return analyze(m, ap, aj, duplicate); + } + } + int initialize(); int factorize(const value_type_array &ax); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Chol.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Chol.hpp index 70910b85d8b5..7245ae403d71 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Chol.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Chol.hpp @@ -32,10 +32,10 @@ struct CholAlgorithm { }; struct CholAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; } // namespace Tacho diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp index bf5e720265ee..0fb38ab2f4cf 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp @@ -24,9 +24,9 @@ template Driver::Driver() : _method(1), _order_connected_graph_separately(1), _m(0), _nnz(0), _ap(), _h_ap(), _aj(), _h_aj(), _perm(), _h_perm(), _peri(), _h_peri(), _m_graph(0), _nnz_graph(0), _h_ap_graph(), _h_aj_graph(), _h_perm_graph(), - _h_peri_graph(), _nsupernodes(0), _N(nullptr), _verbose(0), _small_problem_thres(1024), _serial_thres_size(-1), + _h_peri_graph(), _nnz_u(0), _nsupernodes(0), _N(nullptr), _verbose(0), _small_problem_thres(1024), _serial_thres_size(-1), _mb(-1), _nb(-1), _front_update_mode(-1), _levelset(0), _device_level_cut(0), _device_factor_thres(128), - _device_solve_thres(128), _variant(2), _nstreams(16), _max_num_superblocks(-1) {} + _device_solve_thres(128), _variant(2), _nstreams(16), _pivot_tol(0.0), _max_num_superblocks(-1) {} /// /// duplicate the object @@ -157,9 +157,23 @@ template void Driver::setLevelSetOptionNumStr _nstreams = nstreams; } +template void Driver::setPivotTolerance(const mag_type pivot_tol) { + _pivot_tol = pivot_tol; +} + +template void Driver::useNoPivotTolerance() { + _pivot_tol = 0.0; +} + +template void Driver::useDefaultPivotTolerance() { + using arith_traits = ArithTraits; + _pivot_tol = sqrt(arith_traits::epsilon()); +} + /// /// get interface /// +template ordinal_type Driver::getNumNonZerosU() const { return _nnz_u; } template ordinal_type Driver::getNumSupernodes() const { return _nsupernodes; } template typename Driver::ordinal_type_array Driver::getSupernodes() const { @@ -179,11 +193,11 @@ typename Driver::ordinal_type_array Driver::getInversePermutatio // internal only template int Driver::analyze() { int r_val(0); - if (_m < _small_problem_thres) { + if (_m <= _small_problem_thres) { /// do nothing if (_verbose) { - printf("TachoSolver: Analyze\n"); - printf("====================\n"); + printf("TachoSolver: Analyze (Small Problem)\n"); + printf("====================================\n"); printf(" Linear system A\n"); printf(" number of equations: %10d\n", _m); printf("\n"); @@ -242,6 +256,7 @@ template int Driver::analyze_linear_system() symbolic_tools_type S(_m, _h_ap, _h_aj, _h_perm, _h_peri); S.symbolicFactorize(_verbose); + _nnz_u = S.NumNonzerosU(); _nsupernodes = S.NumSupernodes(); _stree_level = S.SupernodesTreeLevel(); _stree_roots = S.SupernodesTreeRoots(); @@ -287,6 +302,7 @@ template int Driver::analyze_condensed_graph( S.symbolicFactorize(_verbose); S.evaporateSymbolicFactors(_h_aw_graph, _verbose); + _nnz_u = S.NumNonzerosU(); _nsupernodes = S.NumSupernodes(); _stree_level = S.SupernodesTreeLevel(); _stree_roots = S.SupernodesTreeRoots(); @@ -330,7 +346,7 @@ template int Driver::initialize() { /// /// initialize numeric tools /// - if (_m < _small_problem_thres) { + if (_m <= _small_problem_thres) { /// do nothing } else { /// @@ -370,10 +386,10 @@ template int Driver::factorize(const value_ty } } - if (_m < _small_problem_thres) { + if (_m <= _small_problem_thres) { factorize_small_host(ax); } else { - _N->factorize(ax, _verbose); + _N->factorize(ax, _pivot_tol, _verbose); } return 0; } @@ -463,7 +479,7 @@ int Driver::solve(const value_type_matrix &x, const value_type_matrix &b } } - if (_m < _small_problem_thres) { + if (_m <= _small_problem_thres) { solve_small_host(x, b, t); } else { TACHO_TEST_FOR_EXCEPTION(t.extent(0) < x.extent(0) || t.extent(1) < x.extent(1), std::logic_error, @@ -541,7 +557,7 @@ double Driver::computeRelativeResidual(const value_type_array &ax, const CrsMatrixBase A; A.setExternalMatrix(_m, _m, _nnz, _ap, _aj, ax); - return Tacho::computeRelativeResidual(A, x, b); + return Tacho::computeRelativeResidual(A, x, b, _verbose); } template @@ -553,7 +569,7 @@ void Driver::computeSpMV(const value_type_array &ax, const value_type_ma } template int Driver::exportFactorsToCrsMatrix(crs_matrix_type &A) { - if (_m < _small_problem_thres) { + if (_m <= _small_problem_thres) { typedef ArithTraits ats; const typename ats::mag_type zero(0); @@ -631,6 +647,7 @@ template int Driver::release() { _h_perm_graph = ordinal_type_array_host(); _h_peri_graph = ordinal_type_array_host(); + _nnz_u = 0; _nsupernodes = 0; _supernodes = ordinal_type_array(); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemm.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemm.hpp index 2886782fba4f..103906bde63d 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemm.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemm.hpp @@ -31,10 +31,10 @@ struct GemmAlgorithm { }; struct GemmAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemv.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemv.hpp index 8193de22d610..0ccace7b4ffd 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemv.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Gemv.hpp @@ -31,10 +31,10 @@ struct GemvAlgorithm { }; struct GemvAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; } // namespace Tacho diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools.hpp index 9d48cd14fb96..a4a7c2948e46 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools.hpp @@ -67,6 +67,11 @@ class GraphTools { _perm(i) = i; _peri(i) = i; } + if (verbose) { + printf("Summary: GraphTools (Default)\n"); + printf("=============================\n"); + printf( " Use Natural Ordering\n\n" ); + } } ordinal_type_array PermVector() const { return _perm; } diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.cpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.cpp index a85ef651cc4a..a475f729f38a 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.cpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.cpp @@ -13,7 +13,7 @@ #include "Tacho_Util.hpp" -#if defined(TACHO_HAVE_METIS) +#if defined(TACHO_HAVE_METIS) || defined(TACHO_HAVE_TRILINOS_SS) #include "Tacho_GraphTools_Metis.hpp" namespace Tacho { @@ -39,8 +39,15 @@ GraphTools_Metis::GraphTools_Metis(const Graph &g) { for (ordinal_type i = 0; i < static_cast(_adjncy.extent(0)); ++i) _adjncy(i) = g_col_idx(i); +#if defined(TACHO_HAVE_METIS) + _algo = 2; METIS_SetDefaultOptions(_options); _options[METIS_OPTION_NUMBERING] = 0; +#elif defined(TACHO_HAVE_TRILINOS_SS) + _algo = 1; +#else + _algo = 0; +#endif _perm_t = idx_t_array(do_not_initialize_tag("idx_t_perm"), _nvts); _peri_t = idx_t_array(do_not_initialize_tag("idx_t_peri"), _nvts); @@ -52,7 +59,12 @@ GraphTools_Metis::GraphTools_Metis(const Graph &g) { GraphTools_Metis::~GraphTools_Metis() {} void GraphTools_Metis::setVerbose(const bool verbose) { _verbose = verbose; } -void GraphTools_Metis::setOption(const int id, const idx_t value) { _options[id] = value; } +void GraphTools_Metis::setOption(const int id, const idx_t value) { +#if defined(TACHO_HAVE_METIS) + _options[id] = value; +#endif +} +void GraphTools_Metis::setAlgorithm(const int algo) { _algo = algo; } /// /// reorder by amd @@ -81,13 +93,12 @@ void GraphTools_Metis::reorder(const ordinal_type verbose) { Kokkos::Timer timer; double t_metis = 0; - int algo = 2; - if (algo == 0) { + if (_algo == 0) { for (ordinal_type i = 0; i < _nvts; ++i) { _perm(i) = i; _peri(i) = i; } - } else if (algo == 1) { + } else if (_algo == 1) { int ierr = 0; double amd_info[TRILINOS_AMD_INFO]; @@ -100,8 +111,10 @@ void GraphTools_Metis::reorder(const ordinal_type verbose) { _peri(_perm(i)) = i; } - TACHO_TEST_FOR_EXCEPTION(ierr != METIS_OK, std::runtime_error, "Failed in trilinos_amd"); + // ierr != TRILINOS_AMD_OK && ierr != TRILINOS_AMD_OK_BUT_JUMBLED + TACHO_TEST_FOR_EXCEPTION(ierr < TRILINOS_AMD_OK, std::runtime_error, "Failed in trilinos_amd"); } else { +#if defined(TACHO_HAVE_METIS) int ierr = 0; idx_t *xadj = (idx_t *)_xadj.data(); @@ -121,11 +134,19 @@ void GraphTools_Metis::reorder(const ordinal_type verbose) { } TACHO_TEST_FOR_EXCEPTION(ierr != METIS_OK, std::runtime_error, "Failed in METIS_NodeND"); +#else + TACHO_TEST_FOR_EXCEPTION(true, std::runtime_error, "METIS is not enabled"); +#endif } _is_ordered = true; if (verbose) { - printf("Summary: GraphTools (Metis)\n"); + if (_algo == 0) + printf("Summary: GraphTools (Natural)\n"); + else if (_algo == 1) + printf("Summary: GraphTools (AMD)\n"); + else + printf("Summary: GraphTools (Metis)\n"); printf("===========================\n"); switch (verbose) { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.hpp index e3dd1856e601..87119b84de0f 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_GraphTools_Metis.hpp @@ -16,11 +16,12 @@ #include "Tacho_Util.hpp" -#if defined(TACHO_HAVE_METIS) #include "Tacho_Graph.hpp" #include "trilinos_amd.h" -#include "metis.h" +#if defined(TACHO_HAVE_METIS) + #include "metis.h" +#endif namespace Tacho { @@ -28,6 +29,9 @@ class GraphTools_Metis { public: typedef typename UseThisDevice::type host_device_type; + #if !defined(TACHO_HAVE_METIS) + typedef ordinal_type idx_t; + #endif typedef Kokkos::View idx_t_array; typedef Kokkos::View ordinal_type_array; @@ -36,7 +40,10 @@ class GraphTools_Metis { idx_t _nvts; idx_t_array _xadj, _adjncy, _vwgt; + int _algo; + #if defined(TACHO_HAVE_METIS) idx_t _options[METIS_NOPTIONS]; + #endif // metis output idx_t_array _perm_t, _peri_t; @@ -61,6 +68,7 @@ class GraphTools_Metis { void setVerbose(const bool verbose); void setOption(const int id, const idx_t value); + void setAlgorithm(const int algo); template ordering_type amd_order(ordering_type n, const ordering_type *xadj, @@ -82,4 +90,3 @@ class GraphTools_Metis { } // namespace Tacho #endif -#endif diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Herk.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Herk.hpp index 686eb93909fb..4498fb789921 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Herk.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Herk.hpp @@ -31,10 +31,10 @@ struct HerkAlgorithm { }; struct HerkAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; } // namespace Tacho diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL.hpp index 87eb07915c69..19c9702a83ba 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LDL.hpp @@ -32,10 +32,10 @@ struct LDL_Algorithm { }; struct LDL_Algorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; } // namespace Tacho diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU.hpp index b1f4f33e1122..88cb90fe864c 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU.hpp @@ -32,10 +32,10 @@ struct LU_Algorithm { }; struct LU_Algorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp index 32f48d3abc48..6304a7c84ec3 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_External.hpp @@ -61,6 +61,12 @@ template <> struct LU { } } + template + KOKKOS_INLINE_FUNCTION static int invoke(MemberType &member, const double /*tol*/, const ViewTypeA &A, const ViewTypeP &P) { + // tol is not used, for now + return invoke(member, A, P); + } + template inline static int modify(const ordinal_type m, const ViewTypeP &P) { static constexpr bool runOnHost = run_tacho_on_host_v; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp index b30c0c85c34f..26f4c4c202c6 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Internal.hpp @@ -25,7 +25,6 @@ template <> struct LU { template KOKKOS_INLINE_FUNCTION static int invoke(MemberType &member, const ViewTypeA &A, const ViewTypeP &P) { typedef typename ViewTypeA::non_const_value_type value_type; - // typedef typename ViewTypeP::non_const_value_type p_value_type; static_assert(ViewTypeA::rank == 2, "A is not rank 2 view."); static_assert(ViewTypeP::rank == 1, "P is not rank 1 view."); @@ -41,6 +40,24 @@ template <> struct LU { return r_val; } + template + KOKKOS_INLINE_FUNCTION static int invoke(MemberType &member, const double tol, const ViewTypeA &A, const ViewTypeP &P) { + typedef typename ViewTypeA::non_const_value_type value_type; + + static_assert(ViewTypeA::rank == 2, "A is not rank 2 view."); + static_assert(ViewTypeP::rank == 1, "P is not rank 1 view."); + + TACHO_TEST_FOR_ABORT(P.extent(0) < 4 * A.extent(0), "P should be 4*A.extent(0) ."); + + int r_val(0); + const ordinal_type m = A.extent(0), n = A.extent(1); + if (m > 0 && n > 0) { + /// factorize LU + LapackTeam::getrf(member, tol, m, n, A.data(), A.stride_1(), P.data(), &r_val); + } + return r_val; + } + template KOKKOS_INLINE_FUNCTION static int modify(const MemberType &member, const ordinal_type m, const ViewTypeP &P) { static_assert(ViewTypeP::rank == 1, "P is not rank 1 view."); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp index b0fa4c8d3885..e177dfa7c9c7 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_LU_Serial.hpp @@ -61,6 +61,12 @@ template <> struct LU { } } + template + inline static int invoke(MemberType &member, const double /*tol*/, const ViewTypeA &A, const ViewTypeP &P) { + // tol is not used, for now + return invoke(member, A, P); + } + template inline static int modify(const ordinal_type m, const ViewTypeP &P) { static constexpr bool runOnHost = run_tacho_on_host_v; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp index cde52b82693a..f7308a444b94 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Lapack_Team.hpp @@ -231,13 +231,13 @@ template struct LapackTeam { template static KOKKOS_INLINE_FUNCTION void getrf(const MemberType &member, const int m, const int n, T *KOKKOS_RESTRICT A, const int as1, int *KOKKOS_RESTRICT ipiv, int *info) { + *info = 0; if (m <= 0 || n <= 0) return; using arith_traits = ArithTraits; using mag_type = typename arith_traits::mag_type; - - const T zero(0); + const mag_type zero(0); const int as0 = 1; for (int p = 0; p < m; ++p) { const int iend = m - p - 1, jend = n - p - 1; @@ -248,27 +248,29 @@ template struct LapackTeam { *KOKKOS_RESTRICT a12 = A + (p) * as0 + (p + 1) * as1, *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; + int idx(0); + mag_type val(0.0); { - int idx(0); using reducer_value_type = typename Kokkos::MaxLoc::value_type; reducer_value_type value; Kokkos::MaxLoc reducer_value(value); Kokkos::parallel_reduce( Kokkos::TeamVectorRange(member, 1 + iend), [&](const int &i, reducer_value_type &update) { - const mag_type val = arith_traits::abs(ABR[i * as0]); - if (val > update.val) { - update.val = val; + const mag_type val_i = arith_traits::abs(ABR[i * as0]); + if (val_i > update.val) { + update.val = val_i; update.loc = i; } }, reducer_value); member.team_barrier(); idx = value.loc; + val = value.val; /// pivot Kokkos::single(Kokkos::PerThread(member), [&]() { - if (*info == 0 && *alpha11 == zero) { + if (*info == 0 && val == zero) { *info = 1+p; } ipiv[p] = p + idx + 1; @@ -279,9 +281,74 @@ template struct LapackTeam { member.team_barrier(); } } - + const T alpha = *alpha11; // swapped, so contains new pivot + if(val != zero) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, iend), [&](const int &i) { a21[i * as0] /= alpha; }); + member.team_barrier(); + } + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, jend), [&](const int &j) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(member, iend), + [&](const int &i) { A22[i * as0 + j * as1] -= a21[i * as0] * a12[j * as1]; }); + }); member.team_barrier(); - const T alpha = *alpha11; + } + } + + template + static KOKKOS_INLINE_FUNCTION void getrf(const MemberType &member, const double tol, const int m, const int n, T *KOKKOS_RESTRICT A, + const int as1, int *KOKKOS_RESTRICT ipiv, int *info) { + *info = 0; + if (m <= 0 || n <= 0) + return; + + using arith_traits = ArithTraits; + using mag_type = typename arith_traits::mag_type; + const mag_type zero(0); + //const mag_type tol = sqrt(arith_traits::epsilon()); + const int as0 = 1; + for (int p = 0; p < m; ++p) { + const int iend = m - p - 1, jend = n - p - 1; + T *KOKKOS_RESTRICT alpha11 = A + (p)*as0 + (p)*as1, // as0 & as1 are leading dimension for rows & cols + *KOKKOS_RESTRICT AB = A + (p) * as0, + *KOKKOS_RESTRICT ABR = alpha11, + *KOKKOS_RESTRICT a21 = A + (p + 1) * as0 + (p) * as1, + *KOKKOS_RESTRICT a12 = A + (p) * as0 + (p + 1) * as1, + *KOKKOS_RESTRICT A22 = A + (p + 1) * as0 + (p + 1) * as1; + + int idx(0); + mag_type val(0.0); + { + using reducer_value_type = typename Kokkos::MaxLoc::value_type; + reducer_value_type value; + Kokkos::MaxLoc reducer_value(value); + Kokkos::parallel_reduce( + Kokkos::TeamVectorRange(member, 1 + iend), + [&](const int &i, reducer_value_type &update) { + const mag_type val_i = arith_traits::abs(ABR[i * as0]); + if (val_i > update.val) { + update.val = val_i; + update.loc = i; + } + }, + reducer_value); + member.team_barrier(); + idx = value.loc; + val = value.val; + + /// pivot + Kokkos::single(Kokkos::PerThread(member), [&]() { + if (val < tol) { + ABR[idx * as0] = (arith_traits::real(ABR[idx * as0]) < zero ? -T(tol) : T(tol)); + } + ipiv[p] = p + idx + 1; + }); + if (idx) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, n), + [&](const int &j) { swap(AB[j * as1], AB[idx * as0 + j * as1]); }); + member.team_barrier(); + } + } + const T alpha = *alpha11; // swapped, so contains new pivot Kokkos::parallel_for(Kokkos::TeamVectorRange(member, iend), [&](const int &i) { a21[i * as0] /= alpha; }); member.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(member, jend), [&](const int &j) { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Base.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Base.hpp index 312c2bfcefd9..5430e789a462 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Base.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Base.hpp @@ -24,6 +24,7 @@ namespace Tacho { template class NumericToolsBase { public: using value_type = ValueType; + using mag_type = typename ArithTraits::mag_type; using device_type = DeviceType; using exec_space = typename device_type::execution_space; using exec_memory_space = typename device_type::memory_space; @@ -243,7 +244,7 @@ template class NumericToolsBase { } } - inline virtual void factorize(const value_type_array &ax, const ordinal_type verbose = 0) { + inline virtual void factorize(const value_type_array &ax, const mag_type pivot_tol = 0.0, const ordinal_type verbose = 0) { TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, "The function should be overriden by derived classes"); } diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index 18897036922a..25068de037d7 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -88,6 +88,7 @@ #endif #endif + namespace Tacho { template @@ -112,6 +113,7 @@ class NumericToolsLevelSet : public NumericToolsBase { using typename base_type::supernode_info_type; using typename base_type::supernode_type_array_host; using typename base_type::value_type; + using typename base_type::mag_type; using typename base_type::int_type_array; using typename base_type::value_type_array; using typename base_type::value_type_matrix; @@ -408,7 +410,7 @@ class NumericToolsLevelSet : public NumericToolsBase { /// initialization / release /// inline void initialize(const ordinal_type device_level_cut, const ordinal_type device_factorize_thres, - const ordinal_type device_solve_thres, const int nstreams = 1, const ordinal_type verbose = 0) { + const ordinal_type device_solve_thres, const int nstreams_in = 1, const ordinal_type verbose = 0) { stat_level.n_device_factorize = 0; stat_level.n_device_solve = 0; stat_level.n_team_factorize = 0; @@ -417,6 +419,8 @@ class NumericToolsLevelSet : public NumericToolsBase { Kokkos::Timer timer; timer.reset(); + // # of streams needs to be at least 1 + const int nstreams = max(1, nstreams_in); /// /// level data structure @@ -790,6 +794,8 @@ class NumericToolsLevelSet : public NumericToolsBase { } inline void createStream(const ordinal_type nstreams, const ordinal_type verbose = 0) { + // # of streams needs to be at least 1 + if (nstreams <= 0) return; #if defined(KOKKOS_ENABLE_CUDA) _nstreams = nstreams; if (_streams.size() == size_t(nstreams)) return; @@ -2280,7 +2286,12 @@ class NumericToolsLevelSet : public NumericToolsBase { if (verbose) { printf("Summary: LevelSetTools-Variant-%d (CholeskyFactorize)\n", variant); printf("=====================================================\n"); - printf( "\n ** Team = %f s, Device = %f s, Update = %f s **\n\n",time_parallel,time_device,time_update ); + printf( "\n ** Team = %f s, Device = %f s, Update = %f s **\n",time_parallel,time_device,time_update ); + if (variant == 3) { + printf( " extractCRS with total nnzL = %ld and nnzU = %ld\n\n",colindL.extent(0),colindU.extent(0) ); + } else { + printf( "\n" ); + } print_stat_factor(); fflush(stdout); } @@ -3669,7 +3680,7 @@ class NumericToolsLevelSet : public NumericToolsBase { Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const ordinal_type &i) { buf_solve_nrhs_ptr(i) = nrhs * buf_solve_ptr(i); }); Kokkos::deep_copy(_h_buf_solve_nrhs_ptr, _buf_solve_nrhs_ptr); - _nrhs = nrhs; + _nrhs = nrhs; } } } @@ -4204,7 +4215,7 @@ class NumericToolsLevelSet : public NumericToolsBase { } } - inline void factorizeLU(const value_type_array &ax, const ordinal_type verbose) { + inline void factorizeLU(const value_type_array &ax, const mag_type pivot_tol, const ordinal_type verbose) { constexpr bool is_host = std::is_same::value; Kokkos::Timer timer; Kokkos::Timer tick; @@ -4278,7 +4289,9 @@ class NumericToolsLevelSet : public NumericToolsBase { team_policy_factor policy_factor(1, 1, 1); team_policy_update policy_update(1, 1, 1); functor_type functor(_info, _factorize_mode, _level_sids, _piv, _buf, &rval); - + if (pivot_tol > 0.0) { + functor.setDiagPertubationTol(pivot_tol); + } // get max vector length const ordinal_type vmax = policy_factor.vector_length_max(); { @@ -4333,7 +4346,9 @@ class NumericToolsLevelSet : public NumericToolsBase { if (rval != 0) { TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "GETRF (team) returns non-zero error code."); } - + if (_status != 0) { + TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "GETRF (device) returns non-zero error code."); + } Kokkos::parallel_for("update factor", policy_update, functor); if (verbose) { Kokkos::fence(); time_update += tick.seconds(); @@ -4358,7 +4373,12 @@ class NumericToolsLevelSet : public NumericToolsBase { if (verbose) { printf("Summary: LevelSetTools-Variant-%d (LU Factorize)\n", variant); printf("================================================\n"); - printf( "\n ** Team = %f s, Device = %f s, Update = %f s (%d streams) **\n\n",time_parallel,time_device,time_update,_nstreams ); + printf( "\n ** Team = %f s, Device = %f s, Update = %f s (%d streams) **\n",time_parallel,time_device,time_update,_nstreams ); + if (variant == 3) { + printf( " extractCRS with total nnzL = %ld and nnzU = %ld\n\n",colindL.extent(0),colindU.extent(0) ); + } else { + printf( "\n" ); + } print_stat_factor(); fflush(stdout); } @@ -4564,7 +4584,7 @@ class NumericToolsLevelSet : public NumericToolsBase { } } - inline void factorize(const value_type_array &ax, const ordinal_type verbose = 0) override { + inline void factorize(const value_type_array &ax, const mag_type pivot_tol = 0.0, const ordinal_type verbose = 0) override { Kokkos::deep_copy(_superpanel_buf, value_type(0)); switch (this->getSolutionMethod()) { case 1: { /// Cholesky @@ -4600,7 +4620,7 @@ class NumericToolsLevelSet : public NumericToolsBase { track_alloc(_piv.span() * sizeof(ordinal_type)); } } - factorizeLU(ax, verbose); + factorizeLU(ax, pivot_tol, verbose); break; } default: { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Serial.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Serial.hpp index 584930b56525..86b65e7ef78f 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Serial.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Serial.hpp @@ -45,6 +45,7 @@ class NumericToolsSerial : public NumericToolsBase { using typename base_type::ordinal_type_array; using typename base_type::ordinal_type_array_host; using typename base_type::size_type_array; + using typename base_type::mag_type; using typename base_type::value_type; using typename base_type::value_type_array; using typename base_type::value_type_matrix; @@ -475,7 +476,7 @@ class NumericToolsSerial : public NumericToolsBase { /// /// main interface /// - inline void factorize(const value_type_array &ax, const ordinal_type verbose = 0) override { + inline void factorize(const value_type_array &ax, const mag_type pivot_tol = 0.0, const ordinal_type verbose = 0) override { { const bool test = !std::is_same::value; TACHO_TEST_FOR_EXCEPTION(test, std::logic_error, "Serial interface works on host device only"); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.cpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.cpp index f8e30826a1dc..6f687b8ba1a7 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.cpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.cpp @@ -665,6 +665,7 @@ SymbolicTools::SymbolicTools(const ordinal_type m, const size_type_array &ap, co const ordinal_type_array &perm, const ordinal_type_array &peri) : _m(m), _ap(ap), _aj(aj), _perm(perm), _peri(peri) {} +ordinal_type SymbolicTools::NumNonzerosU() const { return stat.nnz_u; } ordinal_type SymbolicTools::NumSupernodes() const { return _supernodes.extent(0) - 1; } ordinal_type_array SymbolicTools::Supernodes() const { return _supernodes; } size_type_array SymbolicTools::gidSuperPanelPtr() const { return _gid_super_panel_ptr; } diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.hpp index b9a7d8b5236c..2c08e42a95d2 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_SymbolicTools.hpp @@ -169,6 +169,7 @@ class SymbolicTools { Kokkos::deep_copy(_peri, G.InvPermVector()); } + ordinal_type NumNonzerosU() const; ordinal_type NumSupernodes() const; ordinal_type_array Supernodes() const; size_type_array gidSuperPanelPtr() const; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp index 3970b8f9c213..5728d40b5f2f 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp @@ -192,7 +192,7 @@ template struct TeamFunctor_ExtractCrs { [&](const int& i) { // diagonal block ordinal_type j; - for (ordinal_type j = i; j < s.m; j++) { + for (j = i; j < s.m; j++) { if (AT(i,j) != zero) { int nnz = _rowptr[i+offm]; _colind[nnz] = j+offm; @@ -202,8 +202,8 @@ template struct TeamFunctor_ExtractCrs { } // off-diagonal blocksa j = s.m; - for (ordinal_type id = s.sid_col_begin + 1; id < s.sid_col_end - 1; id++) { - for (ordinal_type k = _info.sid_block_colidx(id).second; k < _info.sid_block_colidx(id + 1).second; k++) { + for (ordinal_type blk_id = s.sid_col_begin + 1; blk_id < s.sid_col_end - 1; blk_id++) { + for (ordinal_type k = _info.sid_block_colidx(blk_id).second; k < _info.sid_block_colidx(blk_id + 1).second; k++) { if (AT(i,j) != zero) { int nnz = _rowptr[i+offm]; _colind[nnz] = _info.gid_colidx(k+offn); @@ -262,8 +262,8 @@ template struct TeamFunctor_ExtractCrs { } // off-diagonals (each thread extract col, needing atomic-add) ordinal_type i = s.m; - for (ordinal_type id = s.sid_col_begin + 1; id < s.sid_col_end - 1; id++) { - for (ordinal_type k = _info.sid_block_colidx(id).second; k < _info.sid_block_colidx(id + 1).second; k++) { + for (ordinal_type blk_id = s.sid_col_begin + 1; blk_id < s.sid_col_end - 1; blk_id++) { + for (ordinal_type k = _info.sid_block_colidx(blk_id).second; k < _info.sid_block_colidx(blk_id + 1).second; k++) { if (AL(i, j) != zero) { ordinal_type gid_i = _info.gid_colidx(k+offn); Kokkos::atomic_add(&(_rowptr[1+gid_i]), 1); @@ -329,8 +329,8 @@ template struct TeamFunctor_ExtractCrs { } // off-diagonals (each thread extract col, needing atomic-add) ordinal_type i = s.m; - for (ordinal_type id = s.sid_col_begin + 1; id < s.sid_col_end - 1; id++) { - for (ordinal_type k = _info.sid_block_colidx(id).second; k < _info.sid_block_colidx(id + 1).second; k++) { + for (ordinal_type blk_id = s.sid_col_begin + 1; blk_id < s.sid_col_end - 1; blk_id++) { + for (ordinal_type k = _info.sid_block_colidx(blk_id).second; k < _info.sid_block_colidx(blk_id + 1).second; k++) { if (AL(i, j) != zero) { ordinal_type gid_i = _info.gid_colidx(k+offn); ordinal_type nnz = Kokkos::atomic_fetch_add(&(_rowptr[gid_i]), 1); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp index 3ad435b8e853..33caa7532fb0 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp @@ -34,6 +34,9 @@ template struct TeamFunctor_FactorizeLU { using value_type_array = typename supernode_info_type::value_type_array; using value_type_matrix = typename supernode_info_type::value_type_matrix; + using arith_traits = ArithTraits; + using mag_type = typename arith_traits::mag_type; + private: supernode_info_type _info; ordinal_type_array _compute_mode, _level_sids; @@ -44,6 +47,7 @@ template struct TeamFunctor_FactorizeLU { size_type_array _buf_ptr; value_type_array _buf; + mag_type _tol; int *_rval; public: @@ -54,7 +58,8 @@ template struct TeamFunctor_FactorizeLU { TeamFunctor_FactorizeLU(const supernode_info_type &info, const ordinal_type_array &compute_mode, const ordinal_type_array &level_sids, const ordinal_type_array &piv, const value_type_array buf, int *rval) - : _info(info), _compute_mode(compute_mode), _level_sids(level_sids), _piv(piv), _buf(buf), _rval(rval) {} + : _info(info), _compute_mode(compute_mode), _level_sids(level_sids), _piv(piv), _buf(buf), + _tol(0.0), _rval(rval) {} inline void setRange(const ordinal_type pbeg, const ordinal_type pend) { _pbeg = pbeg; @@ -62,6 +67,7 @@ template struct TeamFunctor_FactorizeLU { } inline void setBufferPtr(const size_type_array &buf_ptr) { _buf_ptr = buf_ptr; } + inline void setDiagPertubationTol(const mag_type tol) { _tol = tol; } /// /// Main functions @@ -78,7 +84,10 @@ template struct TeamFunctor_FactorizeLU { if (m > 0) { UnmanagedViewType AT(s.u_buf, m, n); - err = LU::invoke(member, AT, P); + if (_tol > 0.0) + err = LU::invoke(member, _tol, AT, P); + else + err = LU::invoke(member, AT, P); member.team_barrier(); if (err != 0) { Kokkos::atomic_add(_rval, 1); @@ -117,7 +126,10 @@ template struct TeamFunctor_FactorizeLU { if (m > 0) { UnmanagedViewType AT(s.u_buf, m, n); - err = LU::invoke(member, AT, P); + if (_tol > 0.0) + err = LU::invoke(member, _tol, AT, P); + else + err = LU::invoke(member, AT, P); member.team_barrier(); if (err != 0) { Kokkos::atomic_add(_rval, 1); @@ -178,7 +190,10 @@ template struct TeamFunctor_FactorizeLU { if (m > 0) { UnmanagedViewType AT(s.u_buf, m, n); - err = LU::invoke(member, AT, P); + if (_tol > 0.0) + err = LU::invoke(member, _tol, AT, P); + else + err = LU::invoke(member, AT, P); member.team_barrier(); if (err != 0) { Kokkos::atomic_add(_rval, 1); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsm.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsm.hpp index 7475de0db6ab..2865c2ebd55c 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsm.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsm.hpp @@ -30,10 +30,10 @@ struct TrsmAlgorithm { }; struct TrsmAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsv.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsv.hpp index dd3eee93269c..20631b350e12 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsv.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Trsv.hpp @@ -30,10 +30,10 @@ struct TrsvAlgorithm { }; struct TrsvAlgorithm_Team { -#if defined(KOKKOS_ENABLE_OPENMP) - using type = ActiveHostAlgorithm::type; -#else +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) using type = ActiveAlgorithm::type; +#else + using type = ActiveHostAlgorithm::type; #endif }; diff --git a/packages/stk/CHANGELOG.md b/packages/stk/CHANGELOG.md index 514f7e831a1a..f74c63a19424 100644 --- a/packages/stk/CHANGELOG.md +++ b/packages/stk/CHANGELOG.md @@ -1,5 +1,12 @@ # CHANGELOG +5.23.2 (STK_VERSION 5230200) 12/11/2024 + misc fixes for AMD/ROCm (ATS-4) + stk_mesh: speedup for device-field multi-state rotation + reduce stacksize (sizeof(DeviceMesh)) from ~2900 to ~470 + stk_search: misc fixes + stk_io: add query for existence of fields on database + 5.21.6-1 (STK_VERSION 5210601) 10/31/2024 stk_mesh, stk_search: more fixes for HIP unified and Cuda no-uvm builds diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp index 7bcbd60764f4..d45a371d6576 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp @@ -372,13 +372,12 @@ void run_connected_face_test(const stk::mesh::BulkData& bulk) typedef stk::ngp::TeamPolicy::member_type TeamHandleType; const auto& teamPolicy = stk::ngp::TeamPolicy(ngpMesh.num_buckets(stk::topology::ELEM_RANK), Kokkos::AUTO); - Kokkos::parallel_for(teamPolicy, KOKKOS_LAMBDA(const TeamHandleType& team) { const stk::mesh::NgpMesh::BucketType& bucket = ngpMesh.get_bucket(stk::topology::ELEM_RANK, team.league_rank()); - unsigned numElems = bucket.size(); + const unsigned numElems = bucket.size(); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0u, numElems), [&] (const int& i) { @@ -409,9 +408,8 @@ void run_connected_face_test(const stk::mesh::BulkData& bulk) TEST_F(NgpHowTo, loopOverElemFaces) { - if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { - GTEST_SKIP(); - } + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); auto &field = get_meta().declare_field(stk::topology::NODE_RANK, "myField"); stk::mesh::put_field_on_mesh(field, get_meta().universal_part(), nullptr); diff --git a/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp b/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp index 3482ca2468c2..ba891a31e780 100644 --- a/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp +++ b/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp @@ -71,10 +71,10 @@ TEST(stk_topology, shell_side_topology) { EXPECT_EQ(shell.num_sides(),6u); EXPECT_EQ(shell.side_topology(0), stk::topology::QUAD_4); EXPECT_EQ(shell.side_topology(1), stk::topology::QUAD_4); - EXPECT_EQ(shell.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(shell.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(shell.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(shell.side_topology(5), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(shell.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(shell.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(shell.side_topology(4), stk::topology::LINE_2); + EXPECT_EQ(shell.side_topology(5), stk::topology::LINE_2); } //end_shell_side_topo diff --git a/packages/stk/stk_expreval/stk_expreval/Node.cpp b/packages/stk/stk_expreval/stk_expreval/Node.cpp index 83916c8c7c9c..8a5ac920c8f1 100644 --- a/packages/stk/stk_expreval/stk_expreval/Node.cpp +++ b/packages/stk/stk_expreval/stk_expreval/Node.cpp @@ -82,7 +82,11 @@ double& Node::setResult() { void Node::eval() { - stk::util::clear_fp_errors(); + if (m_owner->get_fp_error_behavior() != Eval::FPErrorBehavior::Ignore) + { + stk::util::clear_fp_errors(); + } + switch (m_opcode) { case OPCODE_STATEMENT: { setResult() = m_left->getResult(); diff --git a/packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp b/packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp new file mode 100644 index 000000000000..d0d9c408ea6f --- /dev/null +++ b/packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp @@ -0,0 +1,73 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + +TEST(StkSearch, NGP_coarse_search_mesh_elem_boxes_MORTON) +{ + using ExecSpace = Kokkos::DefaultExecutionSpace; + MPI_Comm comm = MPI_COMM_WORLD; + if (stk::parallel_machine_size(comm) != 1) { GTEST_SKIP(); } + + stk::mesh::MeshBuilder builder(comm); + std::shared_ptr bulkPtr = builder.create(); + + stk::io::fill_mesh("generated:1x9x19|sideset:xXyYzZ", *bulkPtr); + + Kokkos::View elemBoxes = + createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + Kokkos::View faceBoxes = + createBoundingBoxesForEntities(*bulkPtr, stk::topology::FACE_RANK); + + std::cout<<"Num elem-boxes: "< searchResults; + stk::search::local_coarse_search(elemBoxes, faceBoxes, searchMethod, searchResults, ExecSpace{}); + + const size_t expectedSize = 2910; + EXPECT_EQ(expectedSize, searchResults.size())<<"expected results size: "< +#include #include // for exception #include // for copy, sort, max, find #include // for fmod @@ -80,34 +81,6 @@ namespace { bool meshFieldSort(const stk::io::MeshField& f1, const stk::io::MeshField &f2) { return f1.field()->mesh_meta_data_ordinal() < f2.field()->mesh_meta_data_ordinal(); } - - void add_missing_fields(std::vector *missingFields, - std::map missing_fields_collector) { - if (missingFields) - { - std::vector discoveredMissingFields; - for (auto missingStatedFieldIter : missing_fields_collector) - { - discoveredMissingFields.push_back(stk::io::MeshField(missingStatedFieldIter.first, - missingStatedFieldIter.second->db_name())); - } - std::sort(discoveredMissingFields.begin(), discoveredMissingFields.end(), - [](const stk::io::MeshField &a, const stk::io::MeshField &b) { - return (a.db_name() < b.db_name()) - || ((a.db_name() == b.db_name()) && (a.field()->name() < b.field()->name())); }); - - for(stk::io::MeshField &missingField : *missingFields) - { - std::vector::iterator iter = std::find(discoveredMissingFields.begin(), discoveredMissingFields.end(), missingField); - if(iter != discoveredMissingFields.end()) - { - discoveredMissingFields.erase(iter); - } - } - - missingFields->insert(missingFields->end(), discoveredMissingFields.begin(), discoveredMissingFields.end()); - } - } } namespace stk { @@ -156,11 +129,11 @@ namespace io { m_stopTime(std::numeric_limits::max()), m_periodType(CYCLIC), m_fieldsInitialized(false), - m_haveCachedEntityList(false), + m_haveCachedEntityList(false), m_multiStateSuffixes(nullptr) { STK_ThrowErrorMsgIf(m_database == nullptr || !m_database->ok(true), - "ERROR: Invalid Ioss region detected in add_mesh_database"); + "ERROR: Invalid Ioss region detected in add_mesh_database"); Ioss::DatabaseUsage db_usage = m_database->usage(); if (db_usage == Ioss::READ_RESTART) { @@ -178,8 +151,8 @@ namespace io { } STK_ThrowErrorMsgIf(m_region->mesh_type() != Ioss::MeshType::UNSTRUCTURED, - "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " - "Only 'Unstructured' mesh is currently supported."); + "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " + "Only 'Unstructured' mesh is currently supported."); } @@ -202,8 +175,8 @@ namespace io { m_region = std::shared_ptr(region); STK_ThrowErrorMsgIf(m_region->mesh_type() != Ioss::MeshType::UNSTRUCTURED, - "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " - "Only 'Unstructured' mesh is currently supported."); + "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " + "Only 'Unstructured' mesh is currently supported."); } } @@ -220,7 +193,7 @@ namespace io { if (!fieldAlreadyExists) { m_fields.push_back(mesh_field); stk::io::set_field_role(*mesh_field.field(), Ioss::Field::TRANSIENT); - m_fieldsInitialized = false; + m_fieldsInitialized = false; } } @@ -278,58 +251,9 @@ namespace io { for (size_t i=0; i < fields.size(); i++) { const Ioss::Field::RoleType* role = stk::io::get_field_role(*fields[i]); if ( role && *role == Ioss::Field::TRANSIENT ) { - add_input_field(MeshField(fields[i], fields[i]->name(), tmo)); - } - } - } - - void InputFile::build_field_part_associations_for_part(Ioss::Region *region, - const stk::mesh::FieldBase *f, - const stk::mesh::Part * part, - stk::io::MeshField &mf) - { - stk::mesh::EntityRank rank = part_primary_entity_rank(*part); - // Get Ioss::GroupingEntity corresponding to this part... - Ioss::GroupingEntity *entity = region->get_entity(part->name()); - - if (entity != nullptr) { - if (f->entity_rank() == rank) { - build_field_part_associations(mf, *part, rank, entity); - process_fields_for_grouping_entity(mf, *part, entity); - - if(entity->type() == Ioss::SIDESET) { - auto io_side_set = dynamic_cast(entity); - STK_ThrowRequire(io_side_set != nullptr); - auto fbs = io_side_set->get_side_blocks(); - - for(auto& io_fblock : fbs) { - build_field_part_associations(mf, *part, rank, io_fblock); - process_fields_for_grouping_entity(mf, *part, io_fblock); - } - } - } - - // If rank is != NODE_RANK, then see if field is defined on the nodes of this part - if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { - Ioss::GroupingEntity *node_entity = nullptr; - std::string nodes_name = part->name() + "_nodes"; - - node_entity = region->get_entity(nodes_name); - - if (node_entity == nullptr) { - nodes_name = part->name() + "_n"; - node_entity = region->get_entity(nodes_name); - } - - if (node_entity == nullptr) { - node_entity = region->get_entity("nodeblock_1"); - } - if (node_entity != nullptr) { - build_field_part_associations(mf, *part, stk::topology::NODE_RANK, node_entity); - process_fields_for_grouping_entity(mf, *part, node_entity); - } - } + add_input_field(MeshField(fields[i], fields[i]->name(), tmo)); } + } } bool InputFile::read_input_field(stk::io::MeshField &mf, stk::mesh::BulkData &bulk) @@ -356,14 +280,15 @@ namespace io { "ERROR: Input database '" << region->get_database()->get_filename() << "' has no transient data."); + InputQuery iq(*region, bulk.mesh_meta_data(), m_db_purpose, m_multiStateSuffixes); + const stk::mesh::FieldBase *f = mf.field(); - std::vector::iterator P = mf.m_subsetParts.begin(); - while (P != mf.m_subsetParts.end()) { + + for (const stk::mesh::Part* part : mf.m_subsetParts) { // Find the Ioss::GroupingEntity corresponding to this part... mf.set_inactive(); - const stk::mesh::Part *part = *P; ++P; - build_field_part_associations_for_part(region, f, part, mf); + iq.build_field_part_associations_for_part(mf, part); if (mf.is_active()) { mf.restore_field_data(bulk, sti, false, m_multiStateSuffixes); @@ -371,35 +296,30 @@ namespace io { } if(mf.m_subsetParts.empty()) { - mf.set_inactive(); - // Now handle the non-subsetted fields... - - // Check universal_part() NODE_RANK first... - const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - { - if (f->entity_rank() == stk::topology::NODE_RANK) { - build_field_part_associations(mf, meta.universal_part(), stk::topology::NODE_RANK, - region->get_node_blocks()[0]); - process_fields_for_grouping_entity(mf, meta.universal_part(), region->get_node_blocks()[0]); - } - } - - // Now handle all non-nodeblock parts... - const stk::mesh::PartVector &all_parts = meta.get_parts(); - for ( stk::mesh::PartVector::const_iterator - ip = all_parts.begin(); ip != all_parts.end(); ++ip ) { - - const stk::mesh::Part * part = *ip; + mf.set_inactive(); + // Now handle the non-subsetted fields... - // Check whether this part is an input part... - if (stk::io::is_part_io_part(*part)) { - build_field_part_associations_for_part(region, f, part, mf); - } + // Check universal_part() NODE_RANK first... + const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); + { + if (f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::NodeBlock* nb = region->get_node_blocks()[0]; + iq.build_field_part_associations(mf, meta.universal_part(), stk::topology::NODE_RANK, nb); + iq.process_fields_for_grouping_entity(mf, meta.universal_part(), nb); } + } - if (mf.is_active()) { - mf.restore_field_data(bulk, sti, false, m_multiStateSuffixes); + // Now handle all non-nodeblock parts... + for ( const stk::mesh::Part * part : meta.get_parts() ) { + // Check whether this part is an input part... + if (stk::io::is_part_io_part(*part)) { + iq.build_field_part_associations_for_part(mf, part); } + } + + if (mf.is_active()) { + mf.restore_field_data(bulk, sti, false, m_multiStateSuffixes); + } } return mf.is_active(); @@ -432,318 +352,6 @@ namespace io { return read_defined_input_fields(state_time, missingFields, bulk); } - bool InputFile::build_field_part_associations(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - const stk::mesh::EntityRank rank, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields_collector) - { - bool field_is_missing = false; - stk::mesh::FieldBase *f = mesh_field.field(); - // Only add TRANSIENT Fields -- check role; if not present assume transient... - const Ioss::Field::RoleType *role = stk::io::get_field_role(*f); - if (role == nullptr || *role == Ioss::Field::TRANSIENT) { - if (stk::io::is_field_on_part(f, rank, part)) { - const stk::mesh::FieldBase::Restriction &res = stk::mesh::find_restriction(*f, rank, part); - FieldType field_type; - stk::io::get_io_field_type(f, res, &field_type); - if (field_type.type != Ioss::Field::INVALID) { - - const std::string &db_name = mesh_field.db_name(); - unsigned num_states = f->number_of_states(); - std::vector missing_states; - if (num_states > 1) { - bool has_all_states = all_field_states_exist_on_io_entity(db_name, f, io_entity, missing_states, m_multiStateSuffixes); - if(has_all_states == false) { - field_is_missing = true; - if (missing_fields_collector) { - for (stk::mesh::FieldState missing_state : missing_states) - (*missing_fields_collector)[f->field_state(missing_state)] = &mesh_field; - } - } - } - - bool field_exists = io_entity->field_exists(db_name); - if (!field_exists) { - field_is_missing = true; - if (missing_fields_collector) { - (*missing_fields_collector)[f] = &mesh_field; - } - } - - // See if field with that name exists on io_entity... - if (field_exists) { - mesh_field.add_part(rank, part, io_entity); - mesh_field.set_single_state((m_db_purpose == stk::io::READ_RESTART) ? false : true); - mesh_field.set_active(); - } - } - } - } - return field_is_missing; - } - - bool InputFile::process_fields_for_grouping_entity(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields_collector_ptr) - { - STK_ThrowRequireMsg(io_entity != nullptr, "Null IO entity"); - - bool doesFieldExist = false; - - stk::mesh::FieldBase *f = mesh_field.field(); - - stk::mesh::EntityRank rank = part_primary_entity_rank(part); - if(f->entity_rank() == rank) { - const std::string &db_name = mesh_field.db_name(); - unsigned num_states = f->number_of_states(); - std::vector missing_states; - if (num_states > 1) { - bool has_all_states = all_field_states_exist_on_io_entity(db_name, f, io_entity, missing_states, m_multiStateSuffixes); - if(has_all_states == false) { - if (missing_fields_collector_ptr) { - for (stk::mesh::FieldState missing_state : missing_states) - (*missing_fields_collector_ptr)[f->field_state(missing_state)] = &mesh_field; - } - } else { - doesFieldExist = true; - } - } - - if(doesFieldExist == false) { - doesFieldExist = io_entity->field_exists(db_name); - if (!doesFieldExist) { - if (missing_fields_collector_ptr) { - (*missing_fields_collector_ptr)[f] = &mesh_field; - } - } - } - - // See if field with that name exists on io_entity... - if (doesFieldExist) { - mesh_field.add_part(f->entity_rank(), part, io_entity); - mesh_field.set_single_state((m_db_purpose == stk::io::READ_RESTART) ? false : true); - mesh_field.set_active(); - } - } - - return doesFieldExist; - } - - void InputFile::build_field_part_associations_from_grouping_entity(stk::mesh::BulkData &bulk, std::vector *missingFields) - { - Ioss::Region *region = m_region.get(); - size_t num_missing_fields = 0; - const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - - for (auto &mesh_field : m_fields) - { - if(mesh_field.is_active()) { - continue; - } - - std::map missingFieldCollector; - bool doesFieldExist = false; - stk::mesh::Part &universalPart = meta.universal_part(); - Ioss::GroupingEntity * universalNodeEntity = region->get_entity("nodeblock_1"); - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, universalPart, universalNodeEntity, &missingFieldCollector); - - const stk::mesh::PartVector &all_parts = meta.get_parts(); - for ( stk::mesh::PartVector::const_iterator - ip = all_parts.begin(); ip != all_parts.end(); ++ip ) { - - stk::mesh::Part * const part = *ip; - - // Check whether this part is an input part... - if (stk::io::is_part_io_part(*part)) { - // Get Ioss::GroupingEntity corresponding to this part... - Ioss::GroupingEntity *io_entity = region->get_entity(part->name()); - - if(io_entity == nullptr) { - continue; - } - - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, *part, io_entity, &missingFieldCollector); - - if(io_entity->type() == Ioss::SIDEBLOCK || io_entity->type() == Ioss::SIDESET) - { - static const std::string s_nodeset_suffix("_n"); - - std::string ns_name = part->name(); - ns_name += s_nodeset_suffix; - Ioss::NodeSet *io_node_set = region->get_nodeset(ns_name); - if(io_node_set != nullptr) { - // Process hidden nodesets - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, *part, io_node_set, &missingFieldCollector); - } - } - - if(io_entity->type() == Ioss::SIDESET) - { - Ioss::SideSet* sideSet = dynamic_cast(io_entity); - auto faceBlocks = sideSet->get_side_blocks(); - for (auto faceBlock : faceBlocks) - { - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, *part, faceBlock, &missingFieldCollector); - } - - } - } - } - - if (!doesFieldExist) - { - num_missing_fields += missingFieldCollector.size(); - if (nullptr != missingFields) - { - add_missing_fields(missingFields, missingFieldCollector); - } - else { - for (auto missingField : missingFieldCollector) { - std::cout << "Missing field: " << missingField.second->db_name() << std::endl; - } - } - } - } - - if (num_missing_fields > 0 && missingFields==nullptr) { - std::ostringstream msg; - msg << "ERROR: Input field processing could not find " << num_missing_fields << " fields.\n"; - throw std::runtime_error( msg.str() ); - } - } - - void InputFile::build_field_part_associations(stk::mesh::BulkData &bulk, std::vector *missingFields) - { - std::map missing_fields_collector; - std::map *missing_fields_collector_ptr = - (missingFields ? &missing_fields_collector : 0); - - // Each input field will have a list of the Parts that the field exists on... - // Create this list. - Ioss::Region *region = m_region.get(); - size_t num_missing_fields = 0; - // First handle any fields that are subsetted (restricted to a specified list of parts) - { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - const stk::mesh::FieldBase *f = (*I).field(); - std::vector::iterator P = (*I).m_subsetParts.begin(); - while (P != (*I).m_subsetParts.end()) { - // Find the Ioss::GroupingEntity corresponding to this part... - const stk::mesh::Part *part = *P; ++P; - stk::mesh::EntityRank rank = part_primary_entity_rank(*part); - bool field_is_missing = false; - if (f->entity_rank() == rank) { - Ioss::GroupingEntity *io_entity = region->get_entity(part->name()); - STK_ThrowErrorMsgIf( io_entity == nullptr, - "ERROR: For field '" << (*I).field()->name() - << "' Could not find database entity corresponding to the part named '" - << part->name() << "'."); - field_is_missing = build_field_part_associations(*I, *part, rank, io_entity, missing_fields_collector_ptr); - } - - // If rank is != NODE_RANK, then see if field is defined on the nodes of this part - if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { - Ioss::GroupingEntity *node_entity = nullptr; - std::string nodes_name = part->name() + "_nodes"; - node_entity = region->get_entity(nodes_name); - if (node_entity == nullptr) { - node_entity = region->get_entity("nodeblock_1"); - } - if (node_entity != nullptr) { - field_is_missing = build_field_part_associations(*I, *part, stk::topology::NODE_RANK, node_entity, - missing_fields_collector_ptr); - } - } - - if (field_is_missing) { - ++num_missing_fields; - } - } - ++I; - } - } - - // Now handle the non-subsetted fields... - - // Check universal_part() NODE_RANK first... - const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - if ((*I).m_subsetParts.empty()) { - const stk::mesh::FieldBase *f = (*I).field(); - if (f->entity_rank() == stk::topology::NODE_RANK) { - bool field_is_missing = build_field_part_associations(*I, meta.universal_part(), stk::topology::NODE_RANK, - region->get_node_blocks()[0], missing_fields_collector_ptr); - if (field_is_missing) { - ++num_missing_fields; - } - } - } - ++I; - } - } - - // Now handle all non-nodeblock parts... - const stk::mesh::PartVector &all_parts = meta.get_parts(); - for ( stk::mesh::PartVector::const_iterator - ip = all_parts.begin(); ip != all_parts.end(); ++ip ) { - - stk::mesh::Part * const part = *ip; - - // Check whether this part is an input part... - if (stk::io::is_part_io_part(*part)) { - stk::mesh::EntityRank rank = part_primary_entity_rank(*part); - // Get Ioss::GroupingEntity corresponding to this part... - Ioss::GroupingEntity *entity = region->get_entity(part->name()); - if (entity != nullptr && !m_fields.empty() && entity->type() != Ioss::SIDESET) { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - if ((*I).m_subsetParts.empty()) { - const stk::mesh::FieldBase *f = (*I).field(); - bool field_is_missing = false; - if (f->entity_rank() == rank) { - field_is_missing = build_field_part_associations(*I, *part, rank, entity, missing_fields_collector_ptr); - } - - // If rank is != NODE_RANK, then see if field is defined on the nodes of this part - if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { - Ioss::GroupingEntity *node_entity = nullptr; - std::string nodes_name = part->name() + "_nodes"; - node_entity = region->get_entity(nodes_name); - if (node_entity == nullptr) { - node_entity = region->get_entity("nodeblock_1"); - } - if (node_entity != nullptr) { - field_is_missing = build_field_part_associations(*I, *part, stk::topology::NODE_RANK, node_entity, - missing_fields_collector_ptr); - } - } - - if (field_is_missing) { - ++num_missing_fields; - } - } - ++I; - } - } - } - } - - if (num_missing_fields > 0 && missingFields==nullptr) { - std::ostringstream msg; - msg << "ERROR: Input field processing could not find " << num_missing_fields << " fields.\n"; - throw std::runtime_error( msg.str() ); - } - - add_missing_fields(missingFields, missing_fields_collector); - } - double InputFile::map_analysis_to_db_time(double time) const { double db_time = time; @@ -773,15 +381,16 @@ namespace io { std::sort(m_fields.begin(), m_fields.end(), meshFieldSort); bool ignore_missing_fields = (missingFields != nullptr); + Ioss::Region *region = m_region.get(); if (!m_fieldsInitialized) { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - (*I).set_inactive(); ++I; - } + InputQuery iq(*region, bulk.mesh_meta_data(), m_db_purpose, m_multiStateSuffixes); - build_field_part_associations(bulk, missingFields); - build_field_part_associations_from_grouping_entity(bulk, missingFields); + for (stk::io::MeshField& mf : m_fields) { + mf.set_inactive(); + iq.build_field_part_associations(mf, missingFields); + iq.build_field_part_associations_from_grouping_entity(mf, missingFields); + } m_fieldsInitialized = true; } @@ -796,7 +405,6 @@ namespace io { STK_ThrowErrorMsgIf (m_region.get() == nullptr, "ERROR: There is no Input mesh/restart region associated with this Mesh Data."); - Ioss::Region *region = m_region.get(); // Get struct containing interval of database time(s) containing 'time' DBStepTimeInterval sti(region, db_time); @@ -859,13 +467,15 @@ namespace io { bool ignore_missing_fields = (missingFields != nullptr); + if (!m_fieldsInitialized) { + InputQuery iq(*region, bulk.mesh_meta_data(), m_db_purpose, m_multiStateSuffixes); + for (auto & meshField : m_fields) { meshField.set_inactive(); + iq.build_field_part_associations(meshField, missingFields); } - build_field_part_associations(bulk, missingFields); - m_fieldsInitialized = true; } @@ -903,5 +513,14 @@ namespace io { return time_read; } + void InputFile::initialize_input_fields() + { + for (auto & meshField : m_fields) { + meshField.set_inactive(); + meshField.clear_field_parts(); + } + + m_fieldsInitialized = false; + } } } diff --git a/packages/stk/stk_io/stk_io/InputFile.hpp b/packages/stk/stk_io/stk_io/InputFile.hpp index fe0e5c3ee239..a8c7d37e3e0f 100644 --- a/packages/stk/stk_io/stk_io/InputFile.hpp +++ b/packages/stk/stk_io/stk_io/InputFile.hpp @@ -87,10 +87,6 @@ class Part; stk::mesh::BulkData &bulk, bool useEntityListCache = false); void get_global_variable_names(std::vector &names); - void build_field_part_associations(stk::mesh::BulkData &bulk, std::vector *missing); - - void build_field_part_associations_from_grouping_entity(stk::mesh::BulkData &bulk, std::vector *missingFields); - std::shared_ptr get_input_ioss_region() { if (m_region.get() == nullptr && m_database.get() != nullptr) { @@ -138,22 +134,22 @@ class Part; return true; } + const std::vector& get_multistate_suffixes() const + { + static std::vector emptyVector; + + if(nullptr != m_multiStateSuffixes) { + return *m_multiStateSuffixes; + } + + return emptyVector; + } + + DatabasePurpose get_database_purpose() const { return m_db_purpose; } + + void initialize_input_fields(); + private: - bool process_fields_for_grouping_entity(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields_collector_ptr = nullptr); - - bool build_field_part_associations(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - const stk::mesh::EntityRank rank, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields = nullptr); - - void build_field_part_associations_for_part(Ioss::Region *region, - const stk::mesh::FieldBase *f, - const stk::mesh::Part * part, - stk::io::MeshField &mf); DatabasePurpose m_db_purpose; std::shared_ptr m_database; diff --git a/packages/stk/stk_io/stk_io/InputQuery.cpp b/packages/stk/stk_io/stk_io/InputQuery.cpp new file mode 100644 index 000000000000..6d865a2024bf --- /dev/null +++ b/packages/stk/stk_io/stk_io/InputQuery.cpp @@ -0,0 +1,514 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +// ####################### Start Clang Header Tool Managed Headers ######################## +// clang-format off +#include +#include // for exception +#include // for copy, sort, max, find +#include // for fmod +#include // for size_t +#include // for operator<<, basic_ostream +#include // for numeric_limits +#include // for runtime_error +#include // for READ_RESTART, Database... +#include // for DBStepTimeInterval +#include +#include // for is_part_io_part, all_f... +#include // for MeshField, MeshField::... +#include +#include // for FieldBase, FieldBase::... +#include // for find_restriction +#include // for MetaData +#include // for filename_substitution +#include "stk_util/environment/RuntimeWarning.hpp" // for RuntimeWarningAdHoc +#include // for ThrowErrorMsgIf, Throw... +#include // for move, pair +#include "Ioss_DBUsage.h" // for DatabaseUsage, READ_MODEL +#include "Ioss_DatabaseIO.h" // for DatabaseIO +#include "Ioss_EntityType.h" // for SIDESET, EntityType +#include "Ioss_Field.h" // for Field, Field::TRANSIENT +#include "Ioss_GroupingEntity.h" // for GroupingEntity +#include "Ioss_IOFactory.h" // for IOFactory +#include "Ioss_MeshType.h" // for MeshType, MeshType::UN... +#include "Ioss_NodeBlock.h" // for NodeBlock +#include "Ioss_NodeSet.h" // for NodeSet +#include "Ioss_Property.h" // for Property +#include "Ioss_Region.h" // for Region, NodeBlockConta... +#include "Ioss_SideBlock.h" // for SideBlock +#include "Ioss_SideSet.h" // for SideSet +#include "StkIoUtils.hpp" // for part_primary_entity_rank +#include "stk_mesh/base/BulkData.hpp" // for BulkData +#include "stk_mesh/base/FieldState.hpp" // for FieldState +#include "stk_mesh/base/Part.hpp" // for Part +#include "stk_mesh/base/Types.hpp" // for PartVector, EntityRank +#include "stk_topology/topology.hpp" // for topology, topology::NO... +// clang-format on +// ####################### End Clang Header Tool Managed Headers ######################## + +namespace { +void add_missing_fields(std::vector *missingFields, + stk::io::MissingFieldMap& missingFieldsCollector) +{ + if (nullptr != missingFields) { + std::vector discoveredMissingFields; + for (auto missingStatedFieldIter : missingFieldsCollector) + { + discoveredMissingFields.push_back(stk::io::MeshField(missingStatedFieldIter.first, + missingStatedFieldIter.second->db_name())); + } + std::sort(discoveredMissingFields.begin(), discoveredMissingFields.end(), + [](const stk::io::MeshField &a, const stk::io::MeshField &b) { + return (a.db_name() < b.db_name()) + || ((a.db_name() == b.db_name()) && (a.field()->name() < b.field()->name())); }); + + for(stk::io::MeshField &missingField : *missingFields) { + std::vector::iterator iter = std::find(discoveredMissingFields.begin(), discoveredMissingFields.end(), missingField); + if(iter != discoveredMissingFields.end()) { + discoveredMissingFields.erase(iter); + } + } + + missingFields->insert(missingFields->end(), discoveredMissingFields.begin(), discoveredMissingFields.end()); + } +} +} + +namespace stk { +namespace io { + + InputQuery::InputQuery(const Ioss::Region& region, + const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, + const std::vector* multiStateSuffixes) + : m_region(region), + m_meta(meta), + m_dbPurpose(dbPurpose), + m_multiStateSuffixes(multiStateSuffixes) + { + } + + bool InputQuery::build_field_part_associations(stk::io::MeshField &meshField, + const stk::mesh::Part &part, + const stk::mesh::EntityRank rank, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFieldsCollector) + { + bool fieldIsMissing = false; + stk::mesh::FieldBase *f = meshField.field(); + // Only add TRANSIENT Fields -- check role; if not present assume transient... + const Ioss::Field::RoleType *role = stk::io::get_field_role(*f); + if (role == nullptr || *role == Ioss::Field::TRANSIENT) { + if (stk::io::is_field_on_part(f, rank, part)) { + const stk::mesh::FieldBase::Restriction &res = stk::mesh::find_restriction(*f, rank, part); + FieldType fieldType; + stk::io::get_io_field_type(f, res, &fieldType); + + if (fieldType.type != Ioss::Field::INVALID) { + const std::string &dbName = meshField.db_name(); + unsigned numStates = f->number_of_states(); + std::vector missingStates; + if (numStates > 1) { + bool hasAllStates = all_field_states_exist_on_io_entity(dbName, f, ioEntity, missingStates, m_multiStateSuffixes); + if(hasAllStates == false) { + fieldIsMissing = true; + if (missingFieldsCollector) { + for (stk::mesh::FieldState missingState : missingStates) + (*missingFieldsCollector)[f->field_state(missingState)] = &meshField; + } + } + } + + bool fieldExists = ioEntity->field_exists(dbName); + if (!fieldExists) { + fieldIsMissing = true; + if (missingFieldsCollector) { + (*missingFieldsCollector)[f] = &meshField; + } + } + + // See if field with that name exists on ioEntity... + if (fieldExists) { + meshField.add_part(rank, part, ioEntity); + meshField.set_single_state((m_dbPurpose == stk::io::READ_RESTART) ? false : true); + meshField.set_active(); + } + } + } + } + return fieldIsMissing; + } + + int InputQuery::build_field_part_associations(stk::io::MeshField& mf, + std::vector *missingFields, + const bool throwOnErrorMessage) + { + MissingFieldMap missingFieldsCollector; + MissingFieldMap *missingFieldsCollectorPtr = (missingFields ? &missingFieldsCollector : nullptr); + + // Each input field will have a list of the Parts that the field exists on... + // Create this list. + int numMissingFields = 0; + // First handle any fields that are sub-setted (restricted to a specified list of parts) + + const stk::mesh::FieldBase *f = mf.field(); + + for (const stk::mesh::Part *part : mf.m_subsetParts) { + stk::mesh::EntityRank rank = part_primary_entity_rank(*part); + bool fieldIsMissing = false; + + if (f->entity_rank() == rank) { + Ioss::GroupingEntity *ioEntity = m_region.get_entity(part->name()); + STK_ThrowErrorMsgIf( ioEntity == nullptr, + "ERROR: For field '" << + mf.field()->name() << + "' Could not find database entity corresponding to the part named '" << + part->name() << "'."); + fieldIsMissing |= build_field_part_associations(mf, *part, rank, ioEntity, missingFieldsCollectorPtr); + } + + // If rank is != NODE_RANK, then see if field is defined on the nodes of this part + if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = nullptr; + std::string nodesName = part->name() + "_nodes"; + nodeEntity = m_region.get_entity(nodesName); + if (nodeEntity == nullptr) { + nodeEntity = m_region.get_entity("nodeblock_1"); + } + if (nodeEntity != nullptr) { + fieldIsMissing |= build_field_part_associations(mf, *part, stk::topology::NODE_RANK, nodeEntity, + missingFieldsCollectorPtr); + } + } + + if (fieldIsMissing) { + ++numMissingFields; + } + } + + + // Now handle the non-subsetted fields... + + // Check universal_part() NODE_RANK first... + if (mf.m_subsetParts.empty()) { + if (f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = m_region.get_node_blocks()[0]; + bool fieldIsMissing = build_field_part_associations(mf, m_meta.universal_part(), stk::topology::NODE_RANK, + nodeEntity, missingFieldsCollectorPtr); + if (fieldIsMissing) { + ++numMissingFields; + } + } + } + + // Now handle all non-nodeblock parts... + for ( stk::mesh::Part * const part : m_meta.get_parts()) { + // Check whether this part is an input part... + if (stk::io::is_part_io_part(*part)) { + stk::mesh::EntityRank rank = part_primary_entity_rank(*part); + // Get Ioss::GroupingEntity corresponding to this part... + Ioss::GroupingEntity *entity = m_region.get_entity(part->name()); + if (entity != nullptr && entity->type() != Ioss::SIDESET) { + + if (mf.m_subsetParts.empty()) { + f = mf.field(); + bool fieldIsMissing = false; + if (f->entity_rank() == rank) { + fieldIsMissing |= build_field_part_associations(mf, *part, rank, entity, missingFieldsCollectorPtr); + } + + // If rank is != NODE_RANK, then see if field is defined on the nodes of this part + if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = nullptr; + std::string nodesName = part->name() + "_nodes"; + nodeEntity = m_region.get_entity(nodesName); + if (nodeEntity == nullptr) { + nodeEntity = m_region.get_entity("nodeblock_1"); + } + if (nodeEntity != nullptr) { + fieldIsMissing |= build_field_part_associations(mf, *part, stk::topology::NODE_RANK, nodeEntity, + missingFieldsCollectorPtr); + } + } + + if (fieldIsMissing) { + ++numMissingFields; + } + } + } + } + } + + if (numMissingFields > 0 && missingFields==nullptr && throwOnErrorMessage) { + std::ostringstream msg; + msg << "ERROR: Input field processing could not find " << numMissingFields << " fields.\n"; + throw std::runtime_error( msg.str() ); + } + + add_missing_fields(missingFields, missingFieldsCollector); + + return numMissingFields; + } + + bool InputQuery::process_fields_for_grouping_entity(stk::io::MeshField &mf, + const stk::mesh::Part &part, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFieldsCollectorPtr) + { + STK_ThrowRequireMsg(ioEntity != nullptr, "Null IO entity"); + + bool doesFieldExist = false; + + stk::mesh::FieldBase *f = mf.field(); + + stk::mesh::EntityRank rank = part_primary_entity_rank(part); + if(f->entity_rank() == rank) { + const std::string &dbName = mf.db_name(); + unsigned numStates = f->number_of_states(); + std::vector missingStates; + if (numStates > 1) { + bool hasAllStates = all_field_states_exist_on_io_entity(dbName, f, ioEntity, missingStates, m_multiStateSuffixes); + if(hasAllStates == false) { + if (missingFieldsCollectorPtr) { + for (stk::mesh::FieldState missingState : missingStates) { + (*missingFieldsCollectorPtr)[f->field_state(missingState)] = &mf; + } + } + } else { + doesFieldExist = true; + } + } + + if(doesFieldExist == false) { + doesFieldExist = ioEntity->field_exists(dbName); + if (!doesFieldExist) { + if (missingFieldsCollectorPtr) { + (*missingFieldsCollectorPtr)[f] = &mf; + } + } + } + + // See if field with that name exists on ioEntity... + if (doesFieldExist) { + mf.add_part(f->entity_rank(), part, ioEntity); + mf.set_single_state((m_dbPurpose == stk::io::READ_RESTART) ? false : true); + mf.set_active(); + } + } + + return doesFieldExist; + } + + int InputQuery::build_field_part_associations_from_grouping_entity(stk::io::MeshField& mf, + std::vector *missingFields, + const bool throwOnErrorMessage) + { + int numMissingFields = 0; + + if(mf.is_active()) { + return numMissingFields; + } + + MissingFieldMap missingFieldCollector; + bool doesFieldExist = false; + stk::mesh::Part &universalPart = m_meta.universal_part(); + Ioss::GroupingEntity * universalNodeEntity = m_region.get_entity("nodeblock_1"); + + doesFieldExist |= process_fields_for_grouping_entity(mf, universalPart, universalNodeEntity, &missingFieldCollector); + + for ( stk::mesh::Part * const part : m_meta.get_parts() ) { + // Check whether this part is an input part... + if (stk::io::is_part_io_part(*part)) { + // Get Ioss::GroupingEntity corresponding to this part... + Ioss::GroupingEntity *ioEntity = m_region.get_entity(part->name()); + + if(ioEntity == nullptr) { + continue; + } + + doesFieldExist |= process_fields_for_grouping_entity(mf, *part, ioEntity, &missingFieldCollector); + + if(ioEntity->type() == Ioss::SIDEBLOCK || ioEntity->type() == Ioss::SIDESET) { + static const std::string s_nodeset_suffix("_n"); + + std::string nsName = part->name(); + nsName += s_nodeset_suffix; + Ioss::NodeSet *ioNodeSet = m_region.get_nodeset(nsName); + if(ioNodeSet != nullptr) { + // Process hidden nodesets + doesFieldExist |= process_fields_for_grouping_entity(mf, *part, ioNodeSet, &missingFieldCollector); + } + } + + if(ioEntity->type() == Ioss::SIDESET) { + Ioss::SideSet* sideSet = dynamic_cast(ioEntity); + auto faceBlocks = sideSet->get_side_blocks(); + for (auto faceBlock : faceBlocks) { + doesFieldExist |= process_fields_for_grouping_entity(mf, *part, faceBlock, &missingFieldCollector); + } + } + } + } + + if (!doesFieldExist) { + numMissingFields += missingFieldCollector.size(); + if (nullptr != missingFields) { + add_missing_fields(missingFields, missingFieldCollector); + } + else { + for (auto missingField : missingFieldCollector) { + std::cout << "Missing field: " << missingField.second->db_name() << std::endl; + } + } + } + + if (numMissingFields > 0 && missingFields==nullptr && throwOnErrorMessage) { + std::ostringstream msg; + msg << "ERROR: Input field processing could not find " << numMissingFields << " fields.\n"; + throw std::runtime_error( msg.str() ); + } + + return numMissingFields; + } + + void InputQuery::build_field_part_associations_for_part(stk::io::MeshField &mf, const stk::mesh::Part * part) + { + stk::mesh::FieldBase *f = mf.field(); + stk::mesh::EntityRank rank = part_primary_entity_rank(*part); + // Get Ioss::GroupingEntity corresponding to this part... + Ioss::GroupingEntity *entity = m_region.get_entity(part->name()); + + if (entity != nullptr) { + if (f->entity_rank() == rank) { + build_field_part_associations(mf, *part, rank, entity); + process_fields_for_grouping_entity(mf, *part, entity); + + if(entity->type() == Ioss::SIDESET) { + auto io_side_set = dynamic_cast(entity); + STK_ThrowRequire(io_side_set != nullptr); + auto fbs = io_side_set->get_side_blocks(); + + for(auto& io_fblock : fbs) { + build_field_part_associations(mf, *part, rank, io_fblock); + process_fields_for_grouping_entity(mf, *part, io_fblock); + } + } + } + + // If rank is != NODE_RANK, then see if field is defined on the nodes of this part + if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = nullptr; + std::string nodes_name = part->name() + "_nodes"; + + nodeEntity = m_region.get_entity(nodes_name); + + if (nodeEntity == nullptr) { + nodes_name = part->name() + "_n"; + nodeEntity = m_region.get_entity(nodes_name); + } + + if (nodeEntity == nullptr) { + nodeEntity = m_region.get_entity("nodeblock_1"); + } + if (nodeEntity != nullptr) { + build_field_part_associations(mf, *part, stk::topology::NODE_RANK, nodeEntity); + process_fields_for_grouping_entity(mf, *part, nodeEntity); + } + } + } + } + + bool verify_field_request(const Ioss::Region& region, const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, const std::vector& multiStateSuffixes, + const stk::io::MeshField &meshField, bool printWarning) + { + stk::io::InputQuery iq(region, meta, dbPurpose, (multiStateSuffixes.empty() ? nullptr : &multiStateSuffixes)); + + stk::io::MeshField mf(meshField.field(), meshField.db_name()); + std::vector missingFields; + + iq.build_field_part_associations(mf, &missingFields, false); + iq.build_field_part_associations_from_grouping_entity(mf, &missingFields, false); + + if(missingFields.size() > 0) { + std::ostringstream oss; + oss << "For input IO field: " + << meshField.db_name() + << " the following associated fields for the requested STK field: " + << meshField.field()->name() + << " of rank: " + << meshField.field()->entity_rank() + << ", are missing in database: " + << region.get_database()->get_filename() + << std::endl; + + for(auto & missingField : missingFields) { + oss << "\t" << missingField.field()->name() << std::endl; + } + + stk::RuntimeWarning() << oss.str(); + } + + return mf.is_active(); + } + + bool verify_field_request(const StkMeshIoBroker &broker, const MeshField &meshField, bool printWarning) + { + auto region = broker.get_input_ioss_region(); + if(!region) { + if(printWarning) { + stk::RuntimeWarning() << "Broker has no input Ioss::Region" << std::endl; + } + + return false; + } + + if(broker.is_meta_data_null()) { + if(printWarning) { + stk::RuntimeWarning() << "Broker has no stk::mesh::MetaData defined" << std::endl; + } + + return false; + } + + const stk::mesh::MetaData &meta = broker.meta_data(); + InputFile& inputFile = broker.get_mesh_database(broker.get_active_mesh()); + + return verify_field_request(*region, meta, inputFile.get_database_purpose(), + inputFile.get_multistate_suffixes(), + meshField, printWarning); + } +} +} + diff --git a/packages/stk/stk_io/stk_io/InputQuery.hpp b/packages/stk/stk_io/stk_io/InputQuery.hpp new file mode 100644 index 000000000000..dfc9128068e1 --- /dev/null +++ b/packages/stk/stk_io/stk_io/InputQuery.hpp @@ -0,0 +1,115 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef STK_STK_IO_STK_IO_INPUTQUERY_HPP_ +#define STK_STK_IO_STK_IO_INPUTQUERY_HPP_ + +#include +#include // for DatabasePurpose +#include +#include +#include "Ioss_EntityType.h" + +namespace Ioss { +class PropertyManager; +class GroupingEntity; +class Region; +class DatabaseIO; +} + +namespace stk { +namespace mesh { +class MetaData; +class BulkData; +class Part; +} + +namespace io { +class StkMeshIoBroker; + +using MissingFieldMap = std::map; + +class InputQuery +{ +public: + InputQuery(const Ioss::Region& region, + const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, + const std::vector* multiStateSuffixes = nullptr); + + ~InputQuery() { } + + int build_field_part_associations(stk::io::MeshField& mf, + std::vector *missingFields = nullptr, + const bool throwOnErrorMessage = true); + + int build_field_part_associations_from_grouping_entity(stk::io::MeshField& mf, + std::vector *missingFields = nullptr, + const bool throwOnErrorMessage = true); + + void build_field_part_associations_for_part(stk::io::MeshField &mf, const stk::mesh::Part * part); + + bool process_fields_for_grouping_entity(stk::io::MeshField &mf, + const stk::mesh::Part &part, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFieldsCollectorPtr = nullptr); + + bool build_field_part_associations(stk::io::MeshField &mesh_field, + const stk::mesh::Part &part, + const stk::mesh::EntityRank rank, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFields = nullptr); + +private: + const Ioss::Region& m_region; + const stk::mesh::MetaData& m_meta; + DatabasePurpose m_dbPurpose; + const std::vector* m_multiStateSuffixes = nullptr; +}; + +bool verify_field_request(const StkMeshIoBroker &broker, + const stk::io::MeshField &meshField, + bool printWarning = true); + +bool verify_field_request(const Ioss::Region& region, + const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, + const std::vector& multiStateSuffixes, + const stk::io::MeshField &meshField, + bool printWarning = true); + +} +} + +#endif /* STK_STK_IO_STK_IO_INPUTQUERY_HPP_ */ diff --git a/packages/stk/stk_io/stk_io/IossBridge.cpp b/packages/stk/stk_io/stk_io/IossBridge.cpp index a69dc2a801f3..0f72a3976e68 100644 --- a/packages/stk/stk_io/stk_io/IossBridge.cpp +++ b/packages/stk/stk_io/stk_io/IossBridge.cpp @@ -1280,7 +1280,7 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta return stk::topology::SHELL_TRI_6_ALL_FACE_SIDES; } else if (name == "shell4") { return stk::topology::SHELL_QUAD_4_ALL_FACE_SIDES; - } else if (name == "shel8") { + } else if (name == "shell8") { return stk::topology::SHELL_QUAD_8_ALL_FACE_SIDES; } else if (name == "shell9") { return stk::topology::SHELL_QUAD_9_ALL_FACE_SIDES; @@ -1837,7 +1837,8 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta } } - const std::string get_suffix_for_field_at_state(enum stk::mesh::FieldState fieldState, std::vector* multiStateSuffixes) + const std::string get_suffix_for_field_at_state(enum stk::mesh::FieldState fieldState, + const std::vector* multiStateSuffixes) { if(nullptr != multiStateSuffixes) { STK_ThrowRequireMsg((multiStateSuffixes->size() >= fieldState), @@ -1872,26 +1873,27 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta } std::string get_stated_field_name(const std::string &fieldBaseName, stk::mesh::FieldState stateIdentifier, - std::vector* multiStateSuffixes) + const std::vector* multiStateSuffixes) { std::string field_name_with_suffix = fieldBaseName + get_suffix_for_field_at_state(stateIdentifier, multiStateSuffixes); return field_name_with_suffix; } bool field_state_exists_on_io_entity(const std::string& dbName, const stk::mesh::FieldBase* field, stk::mesh::FieldState stateIdentifier, - Ioss::GroupingEntity *ioEntity, std::vector* multiStateSuffixes) + Ioss::GroupingEntity *ioEntity, const std::vector* multiStateSuffixes) { std::string fieldNameWithSuffix = get_stated_field_name(dbName, stateIdentifier, multiStateSuffixes); return ioEntity->field_exists(fieldNameWithSuffix); } - bool all_field_states_exist_on_io_entity(const std::string& dbName, const stk::mesh::FieldBase* field, Ioss::GroupingEntity *ioEntity, - std::vector &missingStates, std::vector* inputMultiStateSuffixes) + bool all_field_states_exist_on_io_entity(const std::string& dbName, const stk::mesh::FieldBase* field, + Ioss::GroupingEntity *ioEntity, std::vector &missingStates, + const std::vector* inputMultiStateSuffixes) { bool allStatesExist = true; size_t stateCount = field->number_of_states(); - std::vector* multiStateSuffixes = stateCount > 2 ? inputMultiStateSuffixes : nullptr; + const std::vector* multiStateSuffixes = stateCount > 2 ? inputMultiStateSuffixes : nullptr; if(nullptr != multiStateSuffixes) { STK_ThrowRequire(multiStateSuffixes->size() >= stateCount); @@ -4415,6 +4417,5 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta filter_nodes_by_local_connectivity(bulk, params.get_subset_selector(), nodes); } - }//namespace io }//namespace stk diff --git a/packages/stk/stk_io/stk_io/IossBridge.hpp b/packages/stk/stk_io/stk_io/IossBridge.hpp index 0198587b9f04..9a4fa424c936 100644 --- a/packages/stk/stk_io/stk_io/IossBridge.hpp +++ b/packages/stk/stk_io/stk_io/IossBridge.hpp @@ -372,13 +372,14 @@ void delete_selector_property(Ioss::Region &io_region); void delete_selector_property(Ioss::GroupingEntity *io_entity); std::string get_stated_field_name(const std::string &field_base_name, stk::mesh::FieldState state_identifier, - std::vector* multiStateSuffixes=nullptr); + const std::vector* multiStateSuffixes=nullptr); bool field_state_exists_on_io_entity(const std::string& db_name, const stk::mesh::FieldBase* field, stk::mesh::FieldState state_identifier, - Ioss::GroupingEntity *io_entity, std::vector* multiStateSuffixes=nullptr); + Ioss::GroupingEntity *io_entity, const std::vector* multiStateSuffixes=nullptr); -bool all_field_states_exist_on_io_entity(const std::string& db_name, const stk::mesh::FieldBase* field, Ioss::GroupingEntity *io_entity, - std::vector &missing_states, std::vector* multiStateSuffixes=nullptr); +bool all_field_states_exist_on_io_entity(const std::string& db_name, const stk::mesh::FieldBase* field, + Ioss::GroupingEntity *io_entity, std::vector &missing_states, + const std::vector* multiStateSuffixes=nullptr); void multistate_field_data_from_ioss(const stk::mesh::BulkData& mesh, const stk::mesh::FieldBase *field, diff --git a/packages/stk/stk_io/stk_io/MeshField.cpp b/packages/stk/stk_io/stk_io/MeshField.cpp index 77d92f5d39cc..bb4e1dd75a76 100644 --- a/packages/stk/stk_io/stk_io/MeshField.cpp +++ b/packages/stk/stk_io/stk_io/MeshField.cpp @@ -302,6 +302,11 @@ double MeshField::restore_field_data(stk::mesh::BulkData &bulk, return time_read; } +void MeshField::clear_field_parts() +{ + m_fieldParts.clear(); +} + void MeshFieldPart::release_field_data() { m_preStep = 0; diff --git a/packages/stk/stk_io/stk_io/MeshField.hpp b/packages/stk/stk_io/stk_io/MeshField.hpp index 10f320e17f30..5f38142c4c75 100644 --- a/packages/stk/stk_io/stk_io/MeshField.hpp +++ b/packages/stk/stk_io/stk_io/MeshField.hpp @@ -51,6 +51,7 @@ namespace stk { namespace mesh { class Part; } } // clang-format on // ####################### End Clang Header Tool Managed Headers ######################## namespace stk { namespace io { class InputFile; } } +namespace stk { namespace io { class InputQuery; } } namespace stk { namespace io { @@ -91,6 +92,7 @@ class MeshField public: friend class InputFile; + friend class InputQuery; // Options: // * Frequency: @@ -165,6 +167,8 @@ class MeshField bool field_restored() const {return m_fieldRestored;} double time_restored() const {return m_timeRestored;} + void clear_field_parts(); + private: MeshField(); diff --git a/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp b/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp index f50450bca6e2..6577523e3730 100644 --- a/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp +++ b/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp @@ -112,7 +112,6 @@ void process_surface_entity(Ioss::SideSet *sset, stk::mesh::MetaData &meta) STKIORequire(ss_part != nullptr); stk::mesh::FieldBase *distribution_factors_field = nullptr; - bool surface_df_defined = false; // Has the surface df field been defined yet? size_t block_count = sset->block_count(); for (size_t i=0; i < block_count; i++) { @@ -125,13 +124,14 @@ void process_surface_entity(Ioss::SideSet *sset, stk::mesh::MetaData &meta) } if (sb->field_exists("distribution_factors")) { - if (!surface_df_defined) { - stk::topology::rank_t side_rank = static_cast(stk::io::part_primary_entity_rank(*sb_part)); - std::string field_name = sset->name() + "_df"; - distribution_factors_field = &meta.declare_field(side_rank, field_name); + stk::topology::rank_t side_block_rank = static_cast(stk::io::part_primary_entity_rank(*sb_part)); + std::string field_name = sset->name() + "_df"; + distribution_factors_field = meta.get_field(side_block_rank, field_name); + + if (distribution_factors_field == nullptr) { + distribution_factors_field = &meta.declare_field(side_block_rank, field_name); stk::io::set_field_role(*distribution_factors_field, Ioss::Field::MESH); stk::io::set_distribution_factor_field(*ss_part, *distribution_factors_field); - surface_df_defined = true; } stk::io::set_distribution_factor_field(*sb_part, *distribution_factors_field); int side_node_count = sb->topology()->number_nodes(); diff --git a/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp b/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp index fb9f5e4438eb..76e427b1a2b6 100644 --- a/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp +++ b/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp @@ -372,7 +372,7 @@ std::shared_ptr StkMeshIoBroker::get_input_ioss_region() const } } -InputFile &StkMeshIoBroker::get_mesh_database(size_t input_file_index) +InputFile &StkMeshIoBroker::get_mesh_database(size_t input_file_index) const { validate_input_file_index(input_file_index); return *m_inputFiles[input_file_index]; @@ -1275,6 +1275,12 @@ bool StkMeshIoBroker::get_throw_on_missing_input_fields() const void StkMeshIoBroker::set_enable_all_face_sides_shell_topo(bool flag) { m_enableAllFaceSidesShellTopo = flag; + if (m_inputFiles.size() > m_activeMeshIndex) { + Ioss::Region *region = m_inputFiles[m_activeMeshIndex]->get_input_ioss_region().get(); + if (nullptr != region) { + region->property_add(Ioss::Property("ENABLE_ALL_FACE_SIDES_SHELL", "YES")); + } + } } bool StkMeshIoBroker::get_enable_all_face_sides_shell_topo() const diff --git a/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp b/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp index f58f08df3a40..f7164f11ad61 100644 --- a/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp +++ b/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp @@ -287,8 +287,8 @@ namespace stk { // Get a reference to an existing mesh database so it can be modified // Typical modifications deal with - // times: tart/stop/offset/scale/cycle/periodlength. - InputFile &get_mesh_database(size_t input_file_index); + // times: start/stop/offset/scale/cycle/periodlength. + InputFile &get_mesh_database(size_t input_file_index) const; // Remove the specified mesh database from the list of mesh databases. // All files associated with the mesh database will be closed and destructors diff --git a/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp b/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp index 4eb2e69024ce..a10eb53b61b7 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp @@ -245,7 +245,8 @@ unsigned get_default_bucket_capacity() { return impl::default_maximum_bucket_cap unsigned get_default_initial_bucket_capacity() { return impl::default_initial_bucket_capacity; } unsigned get_default_maximum_bucket_capacity() { return impl::default_maximum_bucket_capacity; } -bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ) +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Jan 1, 2025 +STK_DEPRECATED bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ) { bool result = true ; { @@ -259,21 +260,19 @@ bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ) } inline -bool bucket_key_less( const unsigned * lhs , const unsigned * rhs ) +bool bucket_key_less( const OrdinalVector& lhs , const OrdinalVector& rhs ) { - const unsigned * const last_lhs = lhs + ( *lhs < *rhs ? *lhs : *rhs ); - while ( last_lhs != lhs && *lhs == *rhs ) { ++lhs ; ++rhs ; } - return *lhs < *rhs ; + return lhs < rhs; } -// The part count and part ordinals are less bool BucketLess::operator()( const Bucket * lhs_bucket , - const unsigned * rhs ) const -{ return bucket_key_less( lhs_bucket->key() , rhs ); } + const OrdinalVector& rhs ) const +{ return bucket_key_less( lhs_bucket->key_vector() , rhs ); } -bool BucketLess::operator()( const unsigned * lhs , +bool BucketLess::operator()( const OrdinalVector& lhs , const Bucket * rhs_bucket ) const -{ return bucket_key_less( lhs , rhs_bucket->key() ); } +{ return bucket_key_less( lhs , rhs_bucket->key_vector() ); } +#endif //---------------------------------------------------------------------- @@ -287,7 +286,7 @@ Bucket::Bucket(BulkData & mesh, m_entity_rank(entityRank), m_topology(), m_key(key), - m_partOrdsBeginEnd(m_key.data()+1,m_key.data()+1+m_key[0]), + m_partOrdsBeginEnd(m_key.data(),m_key.data()+m_key.size()), m_capacity(initialCapacity), m_maxCapacity(maximumCapacity), m_size(0), @@ -323,7 +322,7 @@ Bucket::Bucket(BulkData & mesh, setup_connectivity(m_topology, entityRank, stk::topology::FACE_RANK, m_face_kind, m_fixed_face_connectivity); setup_connectivity(m_topology, entityRank, stk::topology::ELEMENT_RANK, m_element_kind, m_fixed_element_connectivity); - m_parts.reserve(m_key.size()-1); + m_parts.reserve(m_key.size()); supersets(m_parts); m_mesh.new_bucket_callback(m_entity_rank, m_parts, m_capacity, this); @@ -654,20 +653,13 @@ unsigned Bucket::get_ngp_field_bucket_is_modified(unsigned fieldOrdinal) const void Bucket::reset_part_ord_begin_end() { - m_partOrdsBeginEnd.first = m_key.data()+1; - m_partOrdsBeginEnd.second = m_key.data()+1+m_key[0]; + m_partOrdsBeginEnd.first = m_key.data(); + m_partOrdsBeginEnd.second = m_key.data()+m_key.size(); } void Bucket::reset_bucket_key(const OrdinalVector& newPartOrdinals) { - unsigned newPartCount = newPartOrdinals.size(); - - m_key.resize(newPartCount + 1); - m_key[0] = newPartCount; - - for(unsigned i = 0; i < newPartCount; i++) { - m_key[i+1] = newPartOrdinals[i]; - } + m_key = newPartOrdinals; } void Bucket::reset_bucket_parts(const OrdinalVector& newPartOrdinals) @@ -748,6 +740,10 @@ bool Bucket::destroy_relation(Entity e_from, Entity e_to, const RelationIdentifi DestroyRelationFunctor functor(from_bucket_ordinal, e_to, static_cast(local_id)); modify_connectivity(functor, m_mesh.entity_rank(e_to)); + if (functor.m_modified) { + mark_for_modification(); + } + return functor.m_modified; } @@ -756,6 +752,10 @@ bool Bucket::declare_relation(unsigned bucket_ordinal, Entity e_to, const Connec DeclareRelationFunctor functor(bucket_ordinal, e_to, ordinal, permutation); modify_connectivity(functor, m_mesh.entity_rank(e_to)); + if (functor.m_modified) { + mark_for_modification(); + } + return functor.m_modified; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp b/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp index 423d3c07eefc..7dfa4941326a 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp @@ -91,8 +91,10 @@ std::ostream & operator << ( std::ostream & , const Bucket & ); std::ostream & print( std::ostream & , const std::string & indent , const Bucket & ); +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Jan 1, 2025 // The part count and parts are equal -bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ); +STK_DEPRECATED bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ); +#endif #define CONNECTIVITY_TYPE_SWITCH(entity_kind, fixed_func_sig, dynamic_func_sig, check_invalid) \ switch(entity_kind) { \ @@ -220,9 +222,9 @@ class Bucket std::pair superset_part_ordinals() const { return m_partOrdsBeginEnd; } -#ifndef DOXYGEN_COMPILE + const std::vector & key_vector() const { return m_key; } + const unsigned * key() const { return m_key.data() ; } -#endif /* DOXYGEN_COMPILE */ /** \brief The allocation size, in bytes, of this bucket */ unsigned allocation_size() const { return 0 ; } @@ -460,8 +462,6 @@ class Bucket unsigned maximumCapacity, unsigned bucketId); - const std::vector & key_vector() const { return m_key; } - // Add a new entity to end of bucket void add_entity(Entity entity = Entity()); @@ -598,16 +598,16 @@ bool has_superset( const Bucket & bucket , const Part & p ) */ bool has_superset( const Bucket & bucket , const PartVector & parts ); - -struct BucketLess { - bool operator()( const Bucket * lhs_bucket , const unsigned * rhs ) const ; - bool operator()( const unsigned * lhs , const Bucket * rhs_bucket ) const ; +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Jan 1 2025 +struct STK_DEPRECATED BucketLess { + bool operator()( const Bucket * lhs_bucket , const OrdinalVector& rhs ) const ; + bool operator()( const OrdinalVector& lhs , const Bucket * rhs_bucket ) const ; }; -inline -BucketVector::iterator -lower_bound( BucketVector & v , const unsigned * key ) +STK_DEPRECATED inline BucketVector::iterator +lower_bound( BucketVector & v , const OrdinalVector& key ) { return std::lower_bound( v.begin() , v.end() , key , BucketLess() ); } +#endif struct BucketIdComparator { diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp b/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp index 5a1655330001..1b81c06f73b8 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp @@ -859,12 +859,13 @@ Entity BulkData::declare_element_side_with_id(const stk::mesh::EntityId globalSi } } else { - EntityKey sideKey(mesh_meta_data().side_rank(), globalSideId); + stk::topology sideTop = bucket(elem).topology().side_topology(sideOrd); + EntityKey sideKey(sideTop.rank(), globalSideId); + std::pair result = internal_get_or_create_entity_with_notification(sideKey); side = result.first; const bool newlyCreated = result.second; - stk::topology sideTop = bucket(elem).topology().side_topology(sideOrd); if (newlyCreated) { PARTVECTOR allParts = add_root_topology_part(parts, mesh_meta_data().get_topology_root_part(sideTop)); allParts.push_back(&mesh_meta_data().locally_owned_part()); @@ -4118,12 +4119,6 @@ void BulkData::internal_finish_modification_end(ModEndOptimizationFlag opt) m_meshModification.get_deleted_entity_cache().update_deleted_entities_container(); - for (FieldBase * stkField : mesh_meta_data().get_fields()) { - if (stkField->has_ngp_field()) { - impl::get_ngp_field(*stkField)->debug_modification_end(synchronized_count()); - } - } - for(SelectorBucketMap& selectorBucketMap : m_selector_to_buckets_maps) { for (SelectorBucketMap::iterator itr = selectorBucketMap.begin(), end = selectorBucketMap.end(); itr != end; ++itr) { if (itr->second.empty()) { @@ -4136,6 +4131,14 @@ void BulkData::internal_finish_modification_end(ModEndOptimizationFlag opt) } notify_finished_mod_end(); + + if (mesh_meta_data().is_field_sync_debugger_enabled()) { + for (FieldBase * stkField : mesh_meta_data().get_fields()) { + if (stkField->has_ngp_field()) { + impl::get_ngp_field(*stkField)->debug_modification_end(synchronized_count()); + } + } + } } bool BulkData::internal_modification_end_for_skin_mesh( EntityRank entity_rank, ModEndOptimizationFlag opt, const stk::mesh::Selector& selectedToSkin, @@ -4807,8 +4810,8 @@ void BulkData::internal_change_bucket_parts_without_propagating_to_downward_conn bucket->reset_bucket_parts(newBucketPartList); originalPartition->reset_partition_key(bucket->key_vector()); } else { - if(impl::partition_key_less(originalPartition->key(), partition->key()) || - impl::partition_key_less(partition->key(), originalPartition->key()) ) { + if(originalPartition->get_legacy_partition_id() < partition->get_legacy_partition_id() || + partition->get_legacy_partition_id() < originalPartition->get_legacy_partition_id()) { originalPartition->remove_bucket(bucket); bucket->reset_bucket_parts(newBucketPartList); @@ -5573,7 +5576,14 @@ void BulkData::de_induce_parts_from_nodes(const stk::mesh::EntityVector & deacti unsigned BulkData::num_sides(Entity entity) const { + if (bucket(entity).topology().has_mixed_rank_sides()) { + auto num_connected_edges = num_connectivity(entity, stk::topology::EDGE_RANK); + auto num_connected_faces = num_connectivity(entity, stk::topology::FACE_RANK); + + return num_connected_edges + num_connected_faces; + } else { return num_connectivity(entity, mesh_meta_data().side_rank()); + } } void BulkData::sort_entities(const stk::mesh::EntitySorterBase& sorter) diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp b/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp index d5f491b8cff8..c752f7255b32 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp @@ -1464,7 +1464,7 @@ class BulkData { friend class ::stk::io::StkMeshIoBroker; template friend class stk::mesh::DeviceMeshT; friend class stk::mesh::StkFieldSyncDebugger; - template class NgpDebugger> friend class stk::mesh::DeviceField; + template class NgpDebugger> friend class stk::mesh::DeviceField; // friends until it is decided what we're doing with Fields and Parallel and BulkData friend void communicate_field_data(const Ghosting & ghosts, const std::vector & fields); diff --git a/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp b/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp index 0756cb18a676..f02b07a3c54e 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp @@ -59,7 +59,7 @@ constexpr unsigned NUM_COMPONENTS_INDEX = 0; constexpr unsigned FIRST_DIMENSION_INDEX = 1; constexpr unsigned INVALID_ORDINAL = 9999999; -template class NgpDebugger> class DeviceField; +template class NgpDebugger> class DeviceField; namespace impl { constexpr double OVERALLOCATION_FACTOR = 1.1; @@ -69,19 +69,18 @@ namespace impl { return std::lround(size_requested*OVERALLOCATION_FACTOR); } - template const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField); - template FieldDataDeviceViewType get_device_data(DeviceField&); + template const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField); + template FieldDataDeviceViewType get_device_data(DeviceField&); } -template class NgpDebugger> +template class NgpDebugger> class DeviceField : public NgpFieldBase { -private: - using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; - public: using ExecSpace = stk::ngp::ExecSpace; + using MemSpace = NgpMemSpace; using value_type = T; + using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; KOKKOS_FUNCTION DeviceField() @@ -114,19 +113,19 @@ class DeviceField : public NgpFieldBase initialize(); } - KOKKOS_DEFAULTED_FUNCTION DeviceField(const DeviceField&) = default; - KOKKOS_DEFAULTED_FUNCTION DeviceField(DeviceField&&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField(const DeviceField&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField(DeviceField&&) = default; KOKKOS_FUNCTION ~DeviceField() {} - KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(const DeviceField&) = default; - KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(DeviceField&&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(const DeviceField&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(DeviceField&&) = default; void initialize() { hostField->template make_field_sync_debugger(); - fieldSyncDebugger = NgpDebugger(&hostField->get_field_sync_debugger()); + fieldSyncDebugger = NgpDebugger(&hostField->get_field_sync_debugger()); } - void set_field_states(DeviceField* /*fields*/[]) + void set_field_states(DeviceField* /*fields*/[]) { } @@ -359,13 +358,15 @@ class DeviceField : public NgpFieldBase void swap_field_views(NgpFieldBase *other) override { - DeviceField* deviceFieldT = dynamic_cast*>(other); + DeviceField* deviceFieldT = dynamic_cast*>(other); STK_ThrowRequireMsg(deviceFieldT != nullptr, "DeviceField::swap_field_views called with class that can't dynamic_cast to DeviceField"); swap_views(deviceData, deviceFieldT->deviceData); + swap_views(hostBucketPtrData, deviceFieldT->hostBucketPtrData); + swap_views(deviceBucketPtrData, deviceFieldT->deviceBucketPtrData); } KOKKOS_FUNCTION - void swap(DeviceField &other) + void swap(DeviceField &other) { swap_views(deviceData, other.deviceData); } @@ -405,10 +406,15 @@ class DeviceField : public NgpFieldBase private: ExecSpace& get_execution_space() const { return hostField->get_execution_space(); } - void set_execution_space(const ExecSpace& executionSpace) { hostField->set_execution_space(executionSpace); } + void set_execution_space(const ExecSpace& executionSpace) + { + static_assert(Kokkos::SpaceAccessibility::accessible); + hostField->set_execution_space(executionSpace); + } void set_execution_space(ExecSpace&& executionSpace) { + static_assert(Kokkos::SpaceAccessibility::accessible); hostField->set_execution_space(std::forward(executionSpace)); } @@ -468,8 +474,9 @@ class DeviceField : public NgpFieldBase void construct_view(const BucketVector& buckets, const std::string& name, unsigned numPerEntity) { unsigned numBuckets = buckets.size(); - FieldDataDeviceViewType tempDataDeviceView = FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, name), numBuckets, - ORDER_INDICES(bucketCapacity, numPerEntity)); + FieldDataDeviceViewType tempDataDeviceView = + FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, name), + numBuckets, ORDER_INDICES(bucketCapacity, numPerEntity)); fieldSyncDebugger.initialize_view(tempDataDeviceView); copy_unmodified_buckets(buckets, tempDataDeviceView, numPerEntity); @@ -609,7 +616,7 @@ class DeviceField : public NgpFieldBase Kokkos::deep_copy(get_execution_space(), deviceBucketPtrData, hostBucketPtrData); } - void copy_unmodified_buckets(const BucketVector& buckets, FieldDataDeviceViewType destDevView, unsigned numPerEntity) + void copy_unmodified_buckets(const BucketVector& buckets, FieldDataDeviceViewType destDevView, unsigned numPerEntity) { for(unsigned i = 0; i < buckets.size(); i++) { unsigned oldBucketId = buckets[i]->get_ngp_field_bucket_id(get_ordinal()); @@ -617,7 +624,7 @@ class DeviceField : public NgpFieldBase if(!buckets[i]->get_ngp_field_bucket_is_modified(get_ordinal())) { STK_ThrowRequire(deviceData.extent(0) != 0 && deviceSelectedBucketOffset.extent(0) != 0); - copy_moved_device_bucket_data, UnmanagedDevInnerView>(destDevView, deviceData, oldBucketId, newBucketId, numPerEntity); + copy_moved_device_bucket_data, UnmanagedDevInnerView>(destDevView, deviceData, oldBucketId, newBucketId, numPerEntity); } } } @@ -682,19 +689,19 @@ class DeviceField : public NgpFieldBase void shift_bucket_forward(unsigned oldBucketId, unsigned newBucketId, unsigned numPerEntity) { - copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, - oldBucketId, newBucketId, - numPerEntity); + copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, + oldBucketId, newBucketId, + numPerEntity); } void shift_buckets_backward(const std::vector & backwardShiftList, unsigned numPerEntity) { for (auto it = backwardShiftList.rbegin(); it != backwardShiftList.rend(); ++it) { const BackwardShiftIndices& shiftIndices = *it; - copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, - shiftIndices.oldIndex, - shiftIndices.newIndex, - numPerEntity); + copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, + shiftIndices.oldIndex, + shiftIndices.newIndex, + numPerEntity); } } @@ -784,11 +791,11 @@ class DeviceField : public NgpFieldBase host = Kokkos::create_mirror_view(view); } - friend NgpDebugger; - friend const FieldDataDeviceViewType impl::get_device_data(const DeviceField&); - friend FieldDataDeviceViewType impl::get_device_data(DeviceField&); + friend NgpDebugger; + friend const FieldDataDeviceViewType impl::get_device_data(const DeviceField&); + friend FieldDataDeviceViewType impl::get_device_data(DeviceField&); - FieldDataDeviceViewType deviceData; + FieldDataDeviceViewType deviceData; FieldDataPointerHostViewType hostBucketPtrData; FieldDataPointerDeviceViewType deviceBucketPtrData; @@ -814,20 +821,20 @@ class DeviceField : public NgpFieldBase UnsignedViewType deviceFieldBucketsNumComponentsPerEntity; UnsignedViewType deviceFieldBucketsMarkedModified; - NgpDebugger fieldSyncDebugger; + NgpDebugger fieldSyncDebugger; }; namespace impl { //not for public consumption. calling this will void your warranty. -template -const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField) +template +const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField) { return deviceField.deviceData; } -template -FieldDataDeviceViewType get_device_data(DeviceField& deviceField) +template +FieldDataDeviceViewType get_device_data(DeviceField& deviceField) { return deviceField.deviceData; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp index 27faedc82158..ece362a2e4f9 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp @@ -97,6 +97,9 @@ struct DeviceBucketT { KOKKOS_INLINE_FUNCTION ConnectedOrdinals get_connected_ordinals(unsigned offsetIntoBucket, stk::mesh::EntityRank connectedRank) const; + KOKKOS_INLINE_FUNCTION + Permutations get_connected_permutations(unsigned offsetIntoBucket, stk::mesh::EntityRank connectedRank) const; + KOKKOS_FUNCTION ConnectedNodes get_nodes(unsigned offsetIntoBucket) const { return get_connected_entities(offsetIntoBucket, stk::topology::NODE_RANK); @@ -136,6 +139,7 @@ struct DeviceBucketT { void initialize_bucket_attributes(const stk::mesh::Bucket &bucket); void initialize_fixed_data_from_host(const stk::mesh::Bucket &bucket); void update_entity_data_from_host(const stk::mesh::Bucket &bucket); + void update_sparse_connectivity_from_host(const stk::mesh::Bucket &bucket); void resize_device_views(const stk::mesh::Bucket &bucket); std::pair scan_entities_for_nodal_connectivity(const stk::mesh::Bucket & bucket); @@ -146,6 +150,11 @@ struct DeviceBucketT { OrdinalViewType m_nodeOrdinals; + Unsigned2dViewType m_sparseConnectivityOffsets; + BucketConnectivityType m_sparseConnectivity; + OrdinalViewType m_sparseConnectivityOrdinals; + PermutationViewType m_sparseConnectivityPermutations; + PartOrdinalViewType m_partOrdinals; const stk::mesh::DeviceMeshT* m_owningMesh; @@ -244,69 +253,39 @@ class DeviceMeshT : public NgpMeshBase } KOKKOS_FUNCTION - ConnectedEntities get_connected_entities(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const + ConnectedEntities get_connected_entities(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex, stk::mesh::EntityRank connectedRank) const { - if (connectedRank == stk::topology::NODE_RANK) - { - return buckets[rank](entity.bucket_id).get_connected_entities(entity.bucket_ord, connectedRank); - } - - int entityOffset = bucketEntityOffsets[rank](entity.bucket_id) + entity.bucket_ord; - int connectivityOffset = entityConnectivityOffset[rank][connectedRank](entityOffset); - size_t numConnected = entityConnectivityOffset[rank][connectedRank](entityOffset+1) - - connectivityOffset; - ConnectedEntities connectedEntities(nullptr, 0); - if (numConnected > 0) { - int stride = 1; - connectedEntities = - ConnectedEntities(&(sparseConnectivity[rank][connectedRank](connectivityOffset)), numConnected, stride); - } - return connectedEntities; + return buckets[rank](entityIndex.bucket_id).get_connected_entities(entityIndex.bucket_ord, connectedRank); } KOKKOS_FUNCTION - ConnectedNodes get_nodes(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedNodes get_nodes(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return buckets[rank](entity.bucket_id).get_nodes(entity.bucket_ord); + return buckets[rank](entityIndex.bucket_id).get_nodes(entityIndex.bucket_ord); } KOKKOS_FUNCTION - ConnectedEntities get_edges(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedEntities get_edges(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return get_connected_entities(rank, entity, stk::topology::EDGE_RANK); + return get_connected_entities(rank, entityIndex, stk::topology::EDGE_RANK); } KOKKOS_FUNCTION - ConnectedEntities get_faces(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedEntities get_faces(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return get_connected_entities(rank, entity, stk::topology::FACE_RANK); + return get_connected_entities(rank, entityIndex, stk::topology::FACE_RANK); } KOKKOS_FUNCTION - ConnectedEntities get_elements(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedEntities get_elements(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return get_connected_entities(rank, entity, stk::topology::ELEM_RANK); + return get_connected_entities(rank, entityIndex, stk::topology::ELEM_RANK); } KOKKOS_FUNCTION - ConnectedOrdinals get_connected_ordinals(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const + ConnectedOrdinals get_connected_ordinals(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex, stk::mesh::EntityRank connectedRank) const { - if (connectedRank == stk::topology::NODE_RANK) { - return buckets[rank](entity.bucket_id).get_connected_ordinals(entity.bucket_ord, connectedRank); - } - - int entityOffset = bucketEntityOffsets[rank](entity.bucket_id) + entity.bucket_ord; - int connectivityOffset = entityConnectivityOffset[rank][connectedRank](entityOffset); - size_t numConnected = entityConnectivityOffset[rank][connectedRank](entityOffset+1) - - connectivityOffset; - ConnectedOrdinals connectedOrdinals(nullptr, 0); - if (numConnected > 0) - { - int stride = 1; - connectedOrdinals = ConnectedOrdinals( - &(sparseConnectivityOrdinals[rank][connectedRank](connectivityOffset)), numConnected, stride); - } - return connectedOrdinals; + return buckets[rank](entityIndex.bucket_id).get_connected_ordinals(entityIndex.bucket_ord, connectedRank); } KOKKOS_FUNCTION @@ -334,24 +313,9 @@ class DeviceMeshT : public NgpMeshBase } KOKKOS_FUNCTION - Permutations get_permutations(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const + Permutations get_permutations(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex, stk::mesh::EntityRank connectedRank) const { - Permutations permutations(nullptr, 0); - if (connectedRank == stk::topology::NODE_RANK) - { - return permutations; - } - - int entityOffset = bucketEntityOffsets[rank](entity.bucket_id) + entity.bucket_ord; - int connectivityOffset = entityConnectivityOffset[rank][connectedRank](entityOffset); - size_t numConnected = entityConnectivityOffset[rank][connectedRank](entityOffset+1) - - connectivityOffset; - if (numConnected > 0) - { - int stride = 1; - permutations = Permutations(&(sparsePermutations[rank][connectedRank](connectivityOffset)), numConnected, stride); - } - return permutations; + return buckets[rank](entityIndex.bucket_id).get_connected_permutations(entityIndex.bucket_ord, connectedRank); } KOKKOS_FUNCTION @@ -522,12 +486,29 @@ class DeviceMeshT : public NgpMeshBase return m_needSyncToHost; } -private: - void set_entity_keys(const stk::mesh::BulkData& bulk_in); + template + void impl_batch_change_entity_parts(const Kokkos::View& entities, + const Kokkos::View& addPartOrdinals, + const Kokkos::View& removePartOrdinals) + { + using EntitiesMemorySpace = typename std::remove_reference::type::memory_space; + using AddPartOrdinalsMemorySpace = typename std::remove_reference::type::memory_space; + using RemovePartOrdinalsMemorySpace = typename std::remove_reference::type::memory_space; + + static_assert(Kokkos::SpaceAccessibility::accessible, + "The memory space of the 'entities' View is inaccessible from the DeviceMesh execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "The memory space of the 'addPartOrdinals' View is inaccessible from the DeviceMesh execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "The memory space of the 'removePartOrdinals' View is inaccessible from the DeviceMesh execution space"); - void set_bucket_entity_offsets(const stk::mesh::BulkData& bulk_in); + using HostEntitiesType = typename std::remove_reference::type::HostMirror; + using HostAddPartOrdinalsType = typename std::remove_reference::type::HostMirror; + using HostRemovePartOrdinalsType = typename std::remove_reference::type::HostMirror; + } - void fill_sparse_connectivities(const stk::mesh::BulkData& bulk_in); +private: + void set_entity_keys(const stk::mesh::BulkData& bulk_in); KOKKOS_FUNCTION bool is_last_bucket_reference(unsigned rank = stk::topology::NODE_RANK) const @@ -557,8 +538,6 @@ class DeviceMeshT : public NgpMeshBase void copy_mesh_indices_to_device(); - void copy_bucket_entity_offsets_to_device(); - void copy_sparse_connectivities_to_device(); void copy_volatile_fast_shared_comm_map_to_device(); @@ -578,11 +557,6 @@ class DeviceMeshT : public NgpMeshBase HostMeshIndexType hostMeshIndices; MeshIndexType deviceMeshIndices; - BucketEntityOffsetsViewType bucketEntityOffsets[stk::topology::NUM_RANKS]; - UnsignedViewType entityConnectivityOffset[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - EntityViewType sparseConnectivity[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - OrdinalViewType sparseConnectivityOrdinals[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - PermutationViewType sparsePermutations[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; UnsignedViewType volatileFastSharedCommMapOffset[stk::topology::NUM_RANKS]; FastSharedCommMapViewType volatileFastSharedCommMap[stk::topology::NUM_RANKS]; }; @@ -599,9 +573,10 @@ DeviceBucketT::get_connected_entities(unsigned offsetIntoBuck const size_t nodeOffset = m_nodeConnectivityOffsets(offsetIntoBucket); return ConnectedEntities(&m_nodeConnectivity(nodeOffset), numNodes, 1); } - STK_NGP_ThrowAssert(m_owningMesh != nullptr); - stk::mesh::FastMeshIndex meshIndex{bucket_id(), offsetIntoBucket}; - return m_owningMesh->get_connected_entities(entity_rank(), meshIndex, connectedRank); + + const unsigned offset = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket); + const unsigned length = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket+1) - offset; + return ConnectedEntities(&m_sparseConnectivity(offset), length, 1); } template @@ -613,10 +588,26 @@ DeviceBucketT::get_connected_ordinals(unsigned offsetIntoBuck const unsigned numNodes = m_nodeConnectivityOffsets(offsetIntoBucket+1)-m_nodeConnectivityOffsets(offsetIntoBucket); return ConnectedOrdinals(m_nodeOrdinals.data(), numNodes, 1); } - STK_NGP_ThrowAssert(m_owningMesh != nullptr); - stk::mesh::FastMeshIndex meshIndex{bucket_id(), offsetIntoBucket}; - return m_owningMesh->get_connected_ordinals(entity_rank(), meshIndex, connectedRank); + + const unsigned offset = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket); + const unsigned length = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket+1) - offset; + return ConnectedOrdinals(&m_sparseConnectivityOrdinals(offset), length, 1); +} + +template +KOKKOS_INLINE_FUNCTION +typename DeviceBucketT::Permutations +DeviceBucketT::get_connected_permutations(unsigned offsetIntoBucket, stk::mesh::EntityRank connectedRank) const { + STK_NGP_ThrowAssert(connectedRank < stk::topology::NUM_RANKS); + const unsigned offset = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket); + const unsigned length = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket+1) - offset; + if (m_sparseConnectivityPermutations.size() <= offset) { + return Permutations(nullptr, 0); + } + + return Permutations(&m_sparseConnectivityPermutations(offset), length, 1); } + template void DeviceBucketT::initialize_bucket_attributes(const stk::mesh::Bucket &bucket) { @@ -660,33 +651,37 @@ void DeviceBucketT::resize_device_views(const stk::mesh::Buck { Kokkos::Profiling::pushRegion("resize_device_views()"); + Kokkos::Profiling::pushRegion("set node ordinals"); + const auto [maxNodesPerEntity, totalNumConnectedNodes] = scan_entities_for_nodal_connectivity(bucket); if (m_nodeOrdinals.size() != maxNodesPerEntity) { - m_nodeOrdinals = OrdinalViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "NodeOrdinals"), - static_cast(maxNodesPerEntity)); + Kokkos::resize(Kokkos::WithoutInitializing, m_nodeOrdinals, static_cast(maxNodesPerEntity)); OrdinalViewType& nodeOrds = m_nodeOrdinals; //local var to avoid implicit this capture Kokkos::parallel_for(Kokkos::RangePolicy(0, maxNodesPerEntity), KOKKOS_LAMBDA(const int i) { nodeOrds(i) = static_cast(i); }); } + Kokkos::Profiling::popRegion(); + Kokkos::Profiling::pushRegion("bucket entities"); if (m_entities.size() != m_bucketCapacity) { - m_entities = EntityViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "BucketEntities"), m_bucketCapacity); + Kokkos::resize(Kokkos::WithoutInitializing, m_entities, m_bucketCapacity); STK_ThrowRequireMsg(m_bucketCapacity > 0, "bucket capacity must be greater than 0"); } + Kokkos::Profiling::popRegion(); + Kokkos::Profiling::pushRegion("nodal connectivity"); if (m_nodeConnectivity.size() != totalNumConnectedNodes) { - m_nodeConnectivity = BucketConnectivityType(Kokkos::view_alloc(Kokkos::WithoutInitializing, - "NodeConnectivity"), totalNumConnectedNodes); + Kokkos::resize(Kokkos::WithoutInitializing, m_nodeConnectivity, totalNumConnectedNodes); } if (m_nodeConnectivityOffsets.size() != m_bucketCapacity+1) { - m_nodeConnectivityOffsets = OrdinalViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, - "NodeConnectivityOffsets"), m_bucketCapacity+1); + Kokkos::resize(Kokkos::WithoutInitializing, m_nodeConnectivityOffsets, m_bucketCapacity+1); } Kokkos::Profiling::popRegion(); + Kokkos::Profiling::popRegion(); } template @@ -725,6 +720,87 @@ void DeviceBucketT::update_entity_data_from_host(const stk::m Kokkos::Profiling::popRegion(); } +constexpr double RESIZE_FACTOR = 0.05; + +template +inline void reallocate_views(DEVICE_VIEW & deviceView, HOST_VIEW & hostView, size_t requiredSize, double resizeFactor = 0.0) +{ + const size_t currentSize = deviceView.extent(0); + const size_t shrinkThreshold = currentSize - static_cast(2*resizeFactor*currentSize); + const bool needGrowth = (requiredSize > currentSize); + const bool needShrink = (requiredSize < shrinkThreshold); + + if (needGrowth || needShrink) { + const size_t newSize = requiredSize + static_cast(resizeFactor*requiredSize); + deviceView = DEVICE_VIEW(Kokkos::view_alloc(Kokkos::WithoutInitializing, deviceView.label()), newSize); + hostView = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceView); + } +} + +template +void DeviceBucketT::update_sparse_connectivity_from_host(const stk::mesh::Bucket &bucket) +{ + Kokkos::Profiling::pushRegion("update_sparse_connectivity_from_host()"); + + Unsigned2dViewType::HostMirror hostConnectivityOffsets("hostConnectivityOffsets", 0,0); + Kokkos::resize(Kokkos::WithoutInitializing, hostConnectivityOffsets, stk::topology::NUM_RANKS, bucket.size()+1); + Kokkos::resize(Kokkos::WithoutInitializing, m_sparseConnectivityOffsets, stk::topology::NUM_RANKS, bucket.size()+1); + BucketConnectivityType::HostMirror hostConnectivity("hostConnectivity", 0); + OrdinalViewType::HostMirror hostConnectivityOrdinals("hostConnectivityOrdinals", 0); + PermutationViewType::HostMirror hostConnectivityPermutations("hostConnectivityPermutations", 0); + + const stk::mesh::EntityRank endRank = static_cast(bucket.mesh().mesh_meta_data().entity_rank_count()); + + unsigned offset = 0; + for(stk::mesh::EntityRank connectedRank=stk::topology::EDGE_RANK; connectedRank void DeviceMeshT::update_mesh() { @@ -738,15 +814,23 @@ void DeviceMeshT::update_mesh() const bool anyChanges = fill_buckets(*bulk); if (anyChanges) { + Kokkos::Profiling::pushRegion("anyChanges stuff"); + + Kokkos::Profiling::pushRegion("entity-keys"); set_entity_keys(*bulk); copy_entity_keys_to_device(); - set_bucket_entity_offsets(*bulk); - copy_bucket_entity_offsets_to_device(); - fill_sparse_connectivities(*bulk); - copy_sparse_connectivities_to_device(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("volatile-fast-shared-comm-map"); copy_volatile_fast_shared_comm_map_to_device(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("mesh-indices"); fill_mesh_indices(*bulk); copy_mesh_indices_to_device(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::popRegion(); } synchronizedCount = bulk->synchronized_count(); @@ -780,6 +864,7 @@ bool DeviceMeshT::fill_buckets(const stk::mesh::BulkData& bulk_in) bucketBuffer[iBucket].initialize_bucket_attributes(stkBucket); bucketBuffer[iBucket].initialize_fixed_data_from_host(stkBucket); bucketBuffer[iBucket].update_entity_data_from_host(stkBucket); + bucketBuffer[iBucket].update_sparse_connectivity_from_host(stkBucket); anyBucketChanges = true; Kokkos::Profiling::popRegion(); } @@ -789,6 +874,7 @@ bool DeviceMeshT::fill_buckets(const stk::mesh::BulkData& bulk_in) new (&bucketBuffer[iBucket]) DeviceBucketT(buckets[rank][ngpBucketId]); if (stkBucket.is_modified()) { bucketBuffer[iBucket].update_entity_data_from_host(stkBucket); + bucketBuffer[iBucket].update_sparse_connectivity_from_host(stkBucket); anyBucketChanges = true; } bucketBuffer[iBucket].m_bucketId = stkBucket.bucket_id(); @@ -811,23 +897,6 @@ bool DeviceMeshT::fill_buckets(const stk::mesh::BulkData& bulk_in) return anyBucketChanges; } -constexpr double RESIZE_FACTOR = 0.05; - -template -inline void reallocate_views(DEVICE_VIEW & deviceView, HOST_VIEW & hostView, size_t requiredSize, double resizeFactor = 0.0) -{ - const size_t currentSize = deviceView.extent(0); - const size_t shrinkThreshold = currentSize - static_cast(2*resizeFactor*currentSize); - const bool needGrowth = (requiredSize > currentSize); - const bool needShrink = (requiredSize < shrinkThreshold); - - if (needGrowth || needShrink) { - const size_t newSize = requiredSize + static_cast(resizeFactor*requiredSize); - deviceView = DEVICE_VIEW(Kokkos::view_alloc(Kokkos::WithoutInitializing, deviceView.label()), newSize); - hostView = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceView); - } -} - template void DeviceMeshT::set_entity_keys(const stk::mesh::BulkData& bulk_in) { @@ -848,119 +917,6 @@ void DeviceMeshT::set_entity_keys(const stk::mesh::BulkData& bulk_i } } -template -void DeviceMeshT::set_bucket_entity_offsets(const stk::mesh::BulkData& bulk_in) -{ - auto& hostBucketEntityOffsets = deviceMeshHostData->hostBucketEntityOffsets; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; ranksize(); - } - for (unsigned i = stkBuckets.size(); i < hostBucketEntityOffsets[rank].extent(0); ++i) { - hostBucketEntityOffsets[rank](i) = bucketOffsetIntoEntities; - } - } -} - -template -void DeviceMeshT::fill_sparse_connectivities(const stk::mesh::BulkData& bulk_in) -{ - auto& hostEntityConnectivityOffset = deviceMeshHostData->hostEntityConnectivityOffset; - auto& hostBucketEntityOffsets = deviceMeshHostData->hostBucketEntityOffsets; - auto& hostSparseConnectivity = deviceMeshHostData->hostSparseConnectivity; - auto& hostSparseConnectivityOrdinals = deviceMeshHostData->hostSparseConnectivityOrdinals; - auto& hostSparsePermutations = deviceMeshHostData->hostSparsePermutations; - - unsigned totalNumConnectedEntities[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS] = {{0}, {0}, {0}, {0}, {0}}; - unsigned totalNumPermutations[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS] = {{0}, {0}, {0}, {0}, {0}}; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; rank 0) { - - const stk::mesh::Entity* connectedEntities = stkBucket.begin(iEntity, connectedRank); - const stk::mesh::ConnectivityOrdinal* connectedOrdinals = stkBucket.begin_ordinals(iEntity, connectedRank); - const stk::mesh::Permutation* permutations = hasPermutation ? stkBucket.begin_permutations(iEntity, connectedRank) : nullptr; - for(unsigned i=0; i void DeviceMeshT::fill_mesh_indices(const stk::mesh::BulkData& bulk_in) { @@ -999,37 +955,6 @@ void DeviceMeshT::copy_mesh_indices_to_device() deviceMeshIndices = nonconst_device_mesh_indices; } -template -void DeviceMeshT::copy_bucket_entity_offsets_to_device() -{ - auto& hostBucketEntityOffsets = deviceMeshHostData->hostBucketEntityOffsets; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; rank -void DeviceMeshT::copy_sparse_connectivities_to_device() -{ - auto& hostEntityConnectivityOffset = deviceMeshHostData->hostEntityConnectivityOffset; - auto& hostSparseConnectivity = deviceMeshHostData->hostSparseConnectivity; - auto& hostSparseConnectivityOrdinals = deviceMeshHostData->hostSparseConnectivityOrdinals; - auto& hostSparsePermutations = deviceMeshHostData->hostSparsePermutations; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; rank void DeviceMeshT::copy_volatile_fast_shared_comm_map_to_device() { diff --git a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp index ca5a1b61ba11..16dafae9617b 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp @@ -71,7 +71,7 @@ void verify_declare_element_edge( ? elem_top.edge_topology(local_edge_id) : invalid; STK_ThrowErrorMsgIf( elem_top!=stk::topology::INVALID_TOPOLOGY && local_edge_id >= elem_top.num_edges(), - "For elem " << mesh.identifier(elem) << ", local_edge_id " << local_edge_id << ", " << + "For elem " << mesh.identifier(elem) << " ("<(s)); m_field_meta_data.swap(sField->m_field_meta_data); @@ -517,15 +518,21 @@ void FieldBase::rotate_multistate_data(bool rotateNgpFieldViews) std::swap(m_modifiedOnHost, sField->m_modifiedOnHost); std::swap(m_modifiedOnDevice, sField->m_modifiedOnDevice); } - - for(int s = 0; s < numStates; ++s) { - NgpFieldBase* ngpField = field_state(static_cast(s))->get_ngp_field(); - if (ngpField != nullptr) { - ngpField->update_bucket_pointer_view(); - ngpField->fence(); + Kokkos::Profiling::popRegion(); + + if (!(rotateNgpFieldViews && allStatesHaveNgpFields)) { + Kokkos::Profiling::pushRegion("ngpField update_bucket_pointer_view"); + for(int s = 0; s < numStates; ++s) { + NgpFieldBase* ngpField = field_state(static_cast(s))->get_ngp_field(); + if (ngpField != nullptr) { + ngpField->update_bucket_pointer_view(); + ngpField->fence(); + } } + Kokkos::Profiling::popRegion(); } + Kokkos::Profiling::pushRegion("ngpField swap_field_views"); if (rotateNgpFieldViews && allStatesHaveNgpFields) { for (int s = 1; s < numStates; ++s) { NgpFieldBase* ngpField_sminus1 = field_state(static_cast(s-1))->get_ngp_field(); @@ -533,12 +540,13 @@ void FieldBase::rotate_multistate_data(bool rotateNgpFieldViews) ngpField_s->swap_field_views(ngpField_sminus1); } } + Kokkos::Profiling::popRegion(); } } void FieldBase::modify_on_host() const -{ +{ STK_ThrowRequireMsg(m_modifiedOnDevice == false, "Modify on host called for Field: " << name() << " but it has an uncleared modified_on_device"); @@ -556,7 +564,7 @@ FieldBase::modify_on_device() const void FieldBase::modify_on_host(const Selector& s) const -{ +{ modify_on_host(); } diff --git a/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp b/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp index 99c56712f671..59b907ca36c1 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp @@ -62,7 +62,7 @@ class BulkData; class MetaData; class UnitTestFieldImpl; class FieldBase; -template class NgpDebugger> class DeviceField; +template class NgpDebugger> class DeviceField; namespace impl { class FieldRepository; @@ -340,8 +340,8 @@ class FieldBase friend NgpFieldBase* impl::get_ngp_field(const FieldBase & stkField); friend void impl::set_ngp_field(const FieldBase & stkField, NgpFieldBase * ngpField); - template class NgpDebugger> friend class HostField; - template class NgpDebugger> friend class DeviceField; + template class NgpDebugger> friend class HostField; + template class NgpDebugger> friend class DeviceField; template friend class Field; protected: diff --git a/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp b/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp index 920e47fde7d0..fb219d103710 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp @@ -41,13 +41,16 @@ namespace stk { namespace mesh { -template class NgpDebugger = DefaultNgpFieldSyncDebugger> -NgpField & get_updated_ngp_field_async(const FieldBase & stkField, const stk::ngp::ExecSpace& execSpace) +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +NgpField & get_updated_ngp_field_async(const FieldBase & stkField, const stk::ngp::ExecSpace& execSpace) { + static_assert(Kokkos::SpaceAccessibility::accessible); + NgpFieldBase * ngpField = impl::get_ngp_field(stkField); if (ngpField == nullptr) { - ngpField = new NgpField(stkField.get_mesh(), stkField, true); + ngpField = new NgpField(stkField.get_mesh(), stkField, true); ngpField->update_field(execSpace); ngpField->debug_initialize_debug_views(); impl::set_ngp_field(stkField, ngpField); @@ -59,16 +62,19 @@ NgpField & get_updated_ngp_field_async(const FieldBase & stkFiel } } - return dynamic_cast< NgpField& >(*ngpField); + return dynamic_cast< NgpField& >(*ngpField); } -template class NgpDebugger = DefaultNgpFieldSyncDebugger> -NgpField & get_updated_ngp_field_async(const FieldBase & stkField, stk::ngp::ExecSpace&& execSpace) +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +NgpField & get_updated_ngp_field_async(const FieldBase & stkField, stk::ngp::ExecSpace&& execSpace) { + static_assert(Kokkos::SpaceAccessibility::accessible); + NgpFieldBase * ngpField = impl::get_ngp_field(stkField); if (ngpField == nullptr) { - ngpField = new NgpField(stkField.get_mesh(), stkField, true); + ngpField = new NgpField(stkField.get_mesh(), stkField, true); ngpField->update_field(std::forward(execSpace)); ngpField->debug_initialize_debug_views(); impl::set_ngp_field(stkField, ngpField); @@ -80,13 +86,17 @@ NgpField & get_updated_ngp_field_async(const FieldBase & stkFiel } } - return dynamic_cast< NgpField& >(*ngpField); + return dynamic_cast< NgpField& >(*ngpField); } -template class NgpDebugger = DefaultNgpFieldSyncDebugger> -NgpField & get_updated_ngp_field(const FieldBase & stkField) +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +NgpField & get_updated_ngp_field(const FieldBase & stkField) { - auto& ngpFieldRef = get_updated_ngp_field_async(stkField, Kokkos::DefaultExecutionSpace()); + using ExecSpace = Kokkos::DefaultExecutionSpace; + static_assert(Kokkos::SpaceAccessibility::accessible); + + auto& ngpFieldRef = get_updated_ngp_field_async(stkField, ExecSpace()); ngpFieldRef.fence(); return ngpFieldRef; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp b/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp index fe70d29c9009..279b38e9f6dc 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp @@ -54,13 +54,14 @@ namespace stk { namespace mesh { -template class NgpDebugger> +template class NgpDebugger> class HostField : public NgpFieldBase { public: using ExecSpace = stk::ngp::ExecSpace; + using MemSpace = NgpMemSpace; using value_type = T; - using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; + using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; HostField() : NgpFieldBase(), @@ -79,10 +80,10 @@ class HostField : public NgpFieldBase field->template make_field_sync_debugger(); } - HostField(const HostField&) = default; - HostField(HostField&&) = default; - HostField& operator=(const HostField&) = default; - HostField& operator=(HostField&&) = default; + HostField(const HostField&) = default; + HostField(HostField&&) = default; + HostField& operator=(const HostField&) = default; + HostField& operator=(HostField&&) = default; void update_field(const ExecSpace& newExecSpace) override { @@ -96,7 +97,7 @@ class HostField : public NgpFieldBase update_field(); } - void set_field_states(HostField* fields[]) {} + void set_field_states(HostField* fields[]) {} size_t num_syncs_to_host() const override { return field->num_syncs_to_host(); } size_t num_syncs_to_device() const override { return field->num_syncs_to_device(); } diff --git a/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp b/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp index 36823b3ede6f..04eba0b2be61 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp @@ -302,6 +302,14 @@ class HostMeshT : public NgpMeshBase return false; } + template + void impl_batch_change_entity_parts(const Kokkos::View& entities, + const Kokkos::View& addPartOrdinals, + const Kokkos::View& removePartOrdinals) + { + batch_change_entity_parts(entities, addPartOrdinals, removePartOrdinals); + } + private: stk::mesh::BulkData *bulk; size_t m_syncCountWhenUpdated; diff --git a/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp b/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp index 67ac6017ab89..d1a0173ee887 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp @@ -48,6 +48,7 @@ #include "stk_mesh/base/Part.hpp" // for Part, etc #include "stk_mesh/base/Selector.hpp" // for Selector #include "stk_mesh/base/Types.hpp" // for PartVector, EntityRank, etc +#include "stk_mesh/base/StkFieldSyncDebugger.hpp" #include "stk_mesh/baseImpl/PartRepository.hpp" // for PartRepository #include "stk_topology/topology.hpp" // for topology, etc #include "stk_util/parallel/Parallel.hpp" // for parallel_machine_rank, etc @@ -145,20 +146,21 @@ void MetaData::require_valid_entity_rank( EntityRank rank ) const //---------------------------------------------------------------------- MetaData::MetaData(size_t spatial_dimension, const std::vector& entity_rank_names) - : m_bulk_data(NULL), - m_commit( false ), - m_are_late_fields_enabled( false ), - m_part_repo( this ), + : m_bulk_data(nullptr), + m_part_repo(this), m_attributes(), - m_universal_part( NULL ), - m_owns_part( NULL ), - m_shares_part( NULL ), - m_aura_part(NULL), + m_universal_part(nullptr), + m_owns_part(nullptr), + m_shares_part(nullptr), + m_aura_part(nullptr), m_field_repo(*this), - m_coord_field(NULL), - m_entity_rank_names( ), - m_spatial_dimension( 0 /*invalid spatial dimension*/), - m_surfaceToBlock() + m_coord_field(nullptr), + m_entity_rank_names(), + m_spatial_dimension(0 /*invalid spatial dimension*/), + m_surfaceToBlock(), + m_commit(false), + m_are_late_fields_enabled(false), + m_isFieldSyncDebuggerEnabled(false) { const size_t numRanks = stk::topology::NUM_RANKS; STK_ThrowRequireMsg(entity_rank_names.size() <= numRanks, "MetaData: number of entity-ranks (" << entity_rank_names.size() << ") exceeds limit of stk::topology::NUM_RANKS (" << numRanks <<")"); @@ -172,20 +174,21 @@ MetaData::MetaData(size_t spatial_dimension, const std::vector& ent } MetaData::MetaData() - : m_bulk_data(NULL), - m_commit( false ), - m_are_late_fields_enabled( false ), - m_part_repo( this ), + : m_bulk_data(nullptr), + m_part_repo(this), m_attributes(), - m_universal_part( NULL ), - m_owns_part( NULL ), - m_shares_part( NULL ), - m_aura_part(NULL), + m_universal_part(nullptr), + m_owns_part(nullptr), + m_shares_part(nullptr), + m_aura_part(nullptr), m_field_repo(*this), - m_coord_field(NULL), - m_entity_rank_names( ), - m_spatial_dimension( 0 /*invalid spatial dimension*/), - m_surfaceToBlock() + m_coord_field(nullptr), + m_entity_rank_names(), + m_spatial_dimension(0 /*invalid spatial dimension*/), + m_surfaceToBlock(), + m_commit(false), + m_are_late_fields_enabled(false), + m_isFieldSyncDebuggerEnabled(false) { // Declare the predefined parts @@ -444,59 +447,51 @@ void MetaData::internal_declare_part_subset( Part & superset , Part & subset, bo //---------------------------------------------------------------------- -void MetaData::declare_field_restriction( - FieldBase & arg_field , - const Part & arg_part , - const unsigned arg_num_scalars_per_entity , - const unsigned arg_first_dimension , - const void * arg_init_value ) +void MetaData::declare_field_restriction(FieldBase& field, + const Part& part, + const unsigned numScalarsPerEntity, + const unsigned firstDimension, + const void* initValue) { - static const char method[] = - "std::mesh::MetaData::declare_field_restriction" ; - - require_same_mesh_meta_data( MetaData::get(arg_field) ); - require_same_mesh_meta_data( MetaData::get(arg_part) ); - - m_field_repo.declare_field_restriction( - method, - arg_field, - arg_part, - m_part_repo.get_all_parts(), - arg_num_scalars_per_entity, - arg_first_dimension, - arg_init_value - ); + require_same_mesh_meta_data(MetaData::get(field)); + require_same_mesh_meta_data(MetaData::get(part)); + + m_field_repo.declare_field_restriction("std::mesh::MetaData::declare_field_restriction", + field, + part, + m_part_repo.get_all_parts(), + numScalarsPerEntity, + firstDimension, + initValue); if (is_commit()) { - m_bulk_data->reallocate_field_data(arg_field); + m_bulk_data->reallocate_field_data(field); } + + FieldSyncDebugger::declare_field_restriction(field, part, numScalarsPerEntity, firstDimension); } -void MetaData::declare_field_restriction( - FieldBase & arg_field , - const Selector & arg_selector , - const unsigned arg_num_scalars_per_entity , - const unsigned arg_first_dimension , - const void * arg_init_value ) +void MetaData::declare_field_restriction(FieldBase& field, + const Selector& selector, + const unsigned numScalarsPerEntity, + const unsigned firstDimension, + const void* initValue) { - static const char method[] = - "std::mesh::MetaData::declare_field_restriction" ; - - require_same_mesh_meta_data( MetaData::get(arg_field) ); - - m_field_repo.declare_field_restriction( - method, - arg_field, - arg_selector, - m_part_repo.get_all_parts(), - arg_num_scalars_per_entity, - arg_first_dimension, - arg_init_value - ); + require_same_mesh_meta_data(MetaData::get(field)); + + m_field_repo.declare_field_restriction("std::mesh::MetaData::declare_field_restriction", + field, + selector, + m_part_repo.get_all_parts(), + numScalarsPerEntity, + firstDimension, + initValue); if (is_commit()) { - m_bulk_data->reallocate_field_data(arg_field); + m_bulk_data->reallocate_field_data(field); } + + FieldSyncDebugger::declare_field_restriction(field, selector, numScalarsPerEntity, firstDimension); } //---------------------------------------------------------------------- @@ -754,6 +749,13 @@ std::vector MetaData::get_part_aliases(const Part& part) const return std::vector(); } + +void MetaData::declare_field_sync_debugger_field(stk::mesh::FieldBase& field) +{ + FieldSyncDebugger::declare_field(field); +} + + //---------------------------------------------------------------------- //---------------------------------------------------------------------- // Verify parallel consistency of fields and parts @@ -1216,7 +1218,7 @@ get_topology(const MetaData& meta_data, EntityRank entity_rank, const std::pair< } -stk::topology get_topology( shards::CellTopology shards_topology, unsigned spatial_dimension) +stk::topology get_topology( shards::CellTopology shards_topology, unsigned spatial_dimension, bool useAllFaceSideShell) { stk::topology t; @@ -1269,8 +1271,7 @@ stk::topology get_topology( shards::CellTopology shards_topology, unsigned spati // t = stk::topology::SPRING_3; else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellTriangle<3> >()) ) { - t = stk::topology::SHELL_TRI_3; - // t = stk::topology::SHELL_TRI_3_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_TRI_3_ALL_FACE_SIDES : stk::topology::SHELL_TRI_3; } //NOTE: shards does not define a shell triangle 4 @@ -1278,21 +1279,17 @@ stk::topology get_topology( shards::CellTopology shards_topology, unsigned spati // t = stk::topology::SHELL_TRI_4; else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellTriangle<6> >()) ) { - t = stk::topology::SHELL_TRI_6; - // t = stk::topology::SHELL_TRI_6_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_TRI_6_ALL_FACE_SIDES : stk::topology::SHELL_TRI_6; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellQuadrilateral<4> >()) ) { - t = stk::topology::SHELL_QUAD_4; - // t = stk::topology::SHELL_QUAD_4_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_QUAD_4_ALL_FACE_SIDES : stk::topology::SHELL_QUAD_4; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellQuadrilateral<8> >()) ) { - t = stk::topology::SHELL_QUAD_8; - // t = stk::topology::SHELL_QUAD_8_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_QUAD_8_ALL_FACE_SIDES : stk::topology::SHELL_QUAD_8; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellQuadrilateral<9> >()) ) { - t = stk::topology::SHELL_QUAD_9; - // t = stk::topology::SHELL_QUAD_9_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_QUAD_9_ALL_FACE_SIDES : stk::topology::SHELL_QUAD_9; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::Tetrahedron<4> >()) ) diff --git a/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp b/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp index 0585b10fbd52..8b1e13486f5f 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp @@ -108,7 +108,7 @@ inline void set_topology(Part & part) stk::topology get_topology(const MetaData& meta_data, EntityRank entity_rank, const std::pair& supersets); /** get the stk::topology given a Shards Cell Topology */ -stk::topology get_topology(shards::CellTopology shards_topology, unsigned spatial_dimension = 3); +stk::topology get_topology(shards::CellTopology shards_topology, unsigned spatial_dimension = 3, bool useAllFaceSideShell = false); /** Get the Shards Cell Topology given a stk::topology */ shards::CellTopology get_cell_topology(stk::topology topo); @@ -590,14 +590,28 @@ class MetaData { bool delete_part_alias_case_insensitive(Part& part, const std::string& alias); std::vector get_part_aliases(const Part& part) const; + // To enable the Field Sync Debugger in a production run, add the STK_DEBUG_FIELD_SYNC + // define to your build. This function is solely used to flip external parts of the + // debugger on for unit testing when it is not enabled globally. + // + void enable_field_sync_debugger() { + m_isFieldSyncDebuggerEnabled = true; + } + + bool is_field_sync_debugger_enabled() { +#ifdef STK_DEBUG_FIELD_SYNC + return true; +#else + return m_isFieldSyncDebuggerEnabled; +#endif + } + protected: Part & declare_internal_part( const std::string & p_name); /** \} */ private: - // Functions - MetaData( const MetaData & ); ///< \brief Not allowed MetaData & operator = ( const MetaData & ); ///< \brief Not allowed @@ -611,11 +625,9 @@ class MetaData { void assign_topology(Part& part, stk::topology stkTopo); - // Members + void declare_field_sync_debugger_field(stk::mesh::FieldBase& field); BulkData* m_bulk_data; - bool m_commit ; - bool m_are_late_fields_enabled; impl::PartRepository m_part_repo ; CSet m_attributes ; @@ -641,6 +653,10 @@ class MetaData { std::map > m_partAlias; std::map> m_partReverseAlias; + bool m_commit; + bool m_are_late_fields_enabled; + bool m_isFieldSyncDebuggerEnabled; + /** \name Invariants/preconditions for MetaData. * \{ */ @@ -878,6 +894,8 @@ MetaData::declare_field(stk::topology::rank_t arg_entity_rank, f[0]->set_mesh(m_bulk_data); + declare_field_sync_debugger_field(*f[0]); + return *f[0]; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp b/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp index 063f2a2616cd..3821c5932c40 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp @@ -51,34 +51,45 @@ template class DeviceMeshT; class StkFieldSyncDebugger; class EmptyStkFieldSyncDebugger; -template class NgpFieldSyncDebugger; -template class EmptyNgpFieldSyncDebugger; +template class NgpFieldSyncDebugger; +template class EmptyNgpFieldSyncDebugger; #ifdef STK_DEBUG_FIELD_SYNC using DefaultStkFieldSyncDebugger = StkFieldSyncDebugger; - template using DefaultNgpFieldSyncDebugger = NgpFieldSyncDebugger; + template + using DefaultNgpFieldSyncDebugger = NgpFieldSyncDebugger; #else using DefaultStkFieldSyncDebugger = EmptyStkFieldSyncDebugger; - template using DefaultNgpFieldSyncDebugger = EmptyNgpFieldSyncDebugger; + template + using DefaultNgpFieldSyncDebugger = EmptyNgpFieldSyncDebugger; #endif -template class NgpDebugger = DefaultNgpFieldSyncDebugger> class HostField; -template class NgpDebugger = DefaultNgpFieldSyncDebugger> class DeviceField; - #ifdef STK_USE_DEVICE_MESH using NgpMeshDefaultMemSpace = stk::ngp::MemSpace; +#else + using NgpMeshDefaultMemSpace = stk::ngp::HostMemSpace; +#endif + +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +class HostField; +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +class DeviceField; + +#ifdef STK_USE_DEVICE_MESH template using NgpMeshT = stk::mesh::DeviceMeshT; using NgpMesh = NgpMeshT; - template class NgpDebugger = DefaultNgpFieldSyncDebugger> - using NgpField = stk::mesh::DeviceField; + + template class NgpDebugger = DefaultNgpFieldSyncDebugger> + using NgpField = stk::mesh::DeviceField; + #else - using NgpMeshDefaultMemSpace = typename stk::ngp::HostExecSpace::memory_space; template using NgpMeshT = stk::mesh::HostMeshT; using NgpMesh = NgpMeshT; - template class NgpDebugger = DefaultNgpFieldSyncDebugger> - using NgpField = stk::mesh::HostField; + + template class NgpDebugger = DefaultNgpFieldSyncDebugger> + using NgpField = stk::mesh::HostField; #endif } diff --git a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp index 3f42f77c62b9..5a5ce4e6de72 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp @@ -52,7 +52,7 @@ namespace stk { namespace mesh { //============================================================================== -template +template class EmptyNgpFieldSyncDebugger { public: @@ -111,7 +111,7 @@ class EmptyNgpFieldSyncDebugger }; //============================================================================== -template +template class NgpFieldSyncDebugger { public: @@ -293,8 +293,8 @@ class NgpFieldSyncDebugger stk::mesh::Selector fieldSelector(*(ngpField->hostField)); UnsignedViewType & localDeviceNumComponentsPerEntity = ngpField->deviceFieldBucketsNumComponentsPerEntity; - FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; - FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; + FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; + FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; LastFieldModLocationType & localLastFieldModLocation = lastFieldModLocation; ScalarUvmType & localLostDeviceFieldData = lostDeviceFieldData; UnsignedViewType & localDebugDeviceSelectedBucketOffset = debugDeviceSelectedBucketOffset; @@ -349,8 +349,8 @@ class NgpFieldSyncDebugger const stk::mesh::BulkData & bulk = *ngpField->hostBulk; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(bulk); UnsignedViewType & localDeviceNumComponentsPerEntity = ngpField->deviceFieldBucketsNumComponentsPerEntity; - FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; - FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; + FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; + FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; UnsignedViewType & localDebugDeviceSelectedBucketOffset = debugDeviceSelectedBucketOffset; stk::mesh::for_each_entity_run(ngpMesh, ngpField->rank, modifiedSelector, @@ -371,8 +371,8 @@ class NgpFieldSyncDebugger const stk::mesh::FieldBase & stkField = *ngpField->hostField; if (buckets.size() != 0) { - lastFieldValue = FieldDataDeviceViewType(stkField.name()+"_lastValue", buckets.size(), - ORDER_INDICES(ngpField->bucketCapacity, numPerEntity)); + lastFieldValue = FieldDataDeviceViewType(stkField.name()+"_lastValue", buckets.size(), + ORDER_INDICES(ngpField->bucketCapacity, numPerEntity)); lastFieldModLocation = LastFieldModLocationType(stkField.name()+"_lastModLocation", buckets.size(), ORDER_INDICES(ngpField->bucketCapacity, numPerEntity)); } @@ -490,7 +490,7 @@ class NgpFieldSyncDebugger ScalarUvmType lostDeviceFieldData; ScalarUvmType anyPotentialDeviceFieldModification; LastFieldModLocationType lastFieldModLocation; - FieldDataDeviceViewType lastFieldValue; + FieldDataDeviceViewType lastFieldValue; typename UnsignedViewType::HostMirror debugHostSelectedBucketOffset; UnsignedViewType debugDeviceSelectedBucketOffset; }; diff --git a/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp b/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp index 85099a167005..b0465dd85342 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp @@ -75,14 +75,18 @@ using HostMeshIndexType = MeshIndexType::HostMirror; using BucketEntityOffsetsViewType = Kokkos::View; template using BucketEntityOffsetsViewTypeT = Kokkos::View; -template using FieldDataDeviceViewType = Kokkos::View; -template using FieldDataHostViewType = Kokkos::View; +template +using FieldDataDeviceViewType = Kokkos::View; +template +using FieldDataHostViewType = Kokkos::View; using FieldDataPointerHostViewType = Kokkos::View; using FieldDataPointerDeviceViewType = Kokkos::View; -template using UnmanagedHostInnerView = Kokkos::View>; -template using UnmanagedDevInnerView = Kokkos::View>; +template +using UnmanagedDevInnerView = Kokkos::View>; +template +using UnmanagedHostInnerView = Kokkos::View>; #ifdef STK_USE_DEVICE_MESH #define ORDER_INDICES(i,j) j,i diff --git a/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp b/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp index 3930bae52dec..341264f6ca51 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp @@ -473,11 +473,11 @@ void SideSetHelper::add_sideset_entry_for_element_selected_by_sidesets(Entity en { if(mesh.bucket_ptr(entity) == nullptr) { return; } - const unsigned numSides = mesh.num_sides(entity); + const unsigned numSides = stk::mesh::num_sides(mesh, entity); if(sidesetsAndSelectors.size() > 0 && mesh.entity_rank(entity) == stk::topology::ELEM_RANK && numSides > 0) { - const stk::mesh::ConnectivityOrdinal* ordinals = mesh.begin_ordinals(entity, mesh.mesh_meta_data().side_rank()); - const stk::mesh::Entity* sides = mesh.begin(entity, mesh.mesh_meta_data().side_rank()); + const std::vector ordinals = stk::mesh::get_side_ordinals(mesh, entity); + const stk::mesh::EntityVector sides = stk::mesh::get_sides(mesh, entity); stk::mesh::SideSetEntry entry(entity); diff --git a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp index 12e5f4e0d4a4..b4a3875e5073 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp @@ -38,10 +38,61 @@ #include "MetaData.hpp" #include "FieldRestriction.hpp" #include "stk_mesh/baseImpl/BucketRepository.hpp" +#include "stk_util/util/string_utils.hpp" namespace stk { namespace mesh { +const static std::string s_lastFieldModLocationPrefix = "DEBUG_lastFieldModLocation_"; + +namespace FieldSyncDebugger { + +void declare_field(stk::mesh::FieldBase& field) +{ + MetaData& meta = field.mesh_meta_data(); + if (meta.is_field_sync_debugger_enabled()) { + if (not stk::string_starts_with(field.name(), s_lastFieldModLocationPrefix)) { + meta.declare_field(field.entity_rank(), + s_lastFieldModLocationPrefix + field.name(), + field.number_of_states()); + } + } +} + +void declare_field_restriction(stk::mesh::FieldBase& field, const Part& part, + const unsigned numScalarsPerEntity, const unsigned firstDimension) +{ + MetaData& meta = field.mesh_meta_data(); + if (meta.is_field_sync_debugger_enabled()) { + if (not stk::string_starts_with(field.name(), s_lastFieldModLocationPrefix)) { + stk::mesh::FieldBase* lastModLocationField = meta.get_field(field.entity_rank(), + s_lastFieldModLocationPrefix + field.name()); + STK_ThrowRequire(lastModLocationField != nullptr); + std::vector initValue(numScalarsPerEntity, LastModLocation::HOST_OR_DEVICE); + meta.declare_field_restriction(*lastModLocationField, part, numScalarsPerEntity, firstDimension, + initValue.data()); + } + } +} + +void declare_field_restriction(stk::mesh::FieldBase& field, const Selector& selector, + const unsigned numScalarsPerEntity, const unsigned firstDimension) +{ + MetaData& meta = field.mesh_meta_data(); + if (meta.is_field_sync_debugger_enabled()) { + if (not stk::string_starts_with(field.name(), s_lastFieldModLocationPrefix)) { + stk::mesh::FieldBase* lastModLocationField = meta.get_field(field.entity_rank(), + s_lastFieldModLocationPrefix + field.name()); + STK_ThrowRequire(lastModLocationField != nullptr); + std::vector initValue(numScalarsPerEntity, LastModLocation::HOST_OR_DEVICE); + meta.declare_field_restriction(*lastModLocationField, selector, numScalarsPerEntity, firstDimension, + initValue.data()); + } + } +} + +} + StkFieldSyncDebugger::StkFieldSyncDebugger(const FieldBase* stkField) : m_stkField(stkField), m_isDataInitialized(false) @@ -133,7 +184,8 @@ StkFieldSyncDebugger::fill_last_mod_location_field_from_device() for (unsigned ordinal = 0; ordinal < bucket->size(); ++ordinal) { const Entity & entity = (*bucket)[ordinal]; const unsigned numComponents = field_scalars_per_entity(lastModLocationField, entity); - uint8_t * lastModLocation = reinterpret_cast(field_data(lastModLocationField, entity)); + uint8_t * lastModLocation = reinterpret_cast(field_data(lastModLocationField, + entity)); for (unsigned component = 0; component < numComponents; ++component) { const unsigned bucketOffset = ngpField.debug_get_bucket_offset(bucket->bucket_id()); lastModLocation[component] = m_debugFieldLastModification(bucketOffset, ORDER_INDICES(ordinal, component)); @@ -147,30 +199,10 @@ StkFieldSyncDebugger::get_last_mod_location_field() const { if (m_lastModLocationField == nullptr) { STK_ThrowRequire(impl::get_ngp_field(*m_stkField) != nullptr); - BulkData & bulk = m_stkField->get_mesh(); - MetaData & meta = bulk.mesh_meta_data(); - meta.enable_late_fields(); - FieldState state = m_stkField->state(); - FieldBase* fieldWithStateNew = m_stkField->field_state(stk::mesh::StateNew); - Field & lastModLocationField = - meta.declare_field(m_stkField->entity_rank(), - "DEBUG_lastFieldModLocation_"+fieldWithStateNew->name(), - m_stkField->number_of_states()); - - meta.set_mesh_on_fields(&bulk); - const FieldBase::RestrictionVector & fieldRestrictions = m_stkField->restrictions(); - if (not fieldRestrictions.empty()) { - for (const FieldBase::Restriction & restriction : fieldRestrictions) { - const unsigned numComponents = restriction.num_scalars_per_entity(); - std::vector initLastModLocation(numComponents, LastModLocation::HOST_OR_DEVICE); - put_field_on_mesh(lastModLocationField, restriction.selector(), numComponents, initLastModLocation.data()); - } - } - else { - bulk.reallocate_field_data(lastModLocationField); - } - - m_lastModLocationField = lastModLocationField.field_state(state); + MetaData& meta = m_stkField->mesh_meta_data(); + m_lastModLocationField = meta.get_field(m_stkField->entity_rank(), + s_lastFieldModLocationPrefix + m_stkField->name()); + STK_ThrowRequire(m_lastModLocationField != nullptr); } return *m_lastModLocationField; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp index b9c2be4e2189..184d77d1be88 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp @@ -48,6 +48,16 @@ namespace mesh { class Bucket; class FieldBase; +namespace FieldSyncDebugger { + +void declare_field(stk::mesh::FieldBase& field); +void declare_field_restriction(stk::mesh::FieldBase& field, const Part& part, + const unsigned numScalarsPerEntity, const unsigned firstDimension); +void declare_field_restriction(stk::mesh::FieldBase& field, const Selector& selector, + const unsigned numScalarsPerEntity, const unsigned firstDimension); + +} + class EmptyStkFieldSyncDebugger { public: diff --git a/packages/stk/stk_mesh/stk_mesh/base/Types.hpp b/packages/stk/stk_mesh/stk_mesh/base/Types.hpp index 4457df3f1796..ebf41a176298 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Types.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Types.hpp @@ -116,6 +116,7 @@ struct MeshIndex Bucket* bucket; unsigned bucket_ordinal; + STK_FUNCTION MeshIndex(Bucket *bucketIn, size_t ordinal) : bucket(bucketIn), bucket_ordinal(ordinal) {} }; diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp index f5fa92071b4a..a703517b4c6f 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp @@ -65,7 +65,7 @@ class BucketConnDynamic m_ordinals(), m_permutations(), m_numUnusedEntries(0), - m_compressionThreshold(0.5) + m_compressionThreshold(2) { STK_ThrowRequireMsg(bucketCapacity > 0, "BucketConnDynamic must have bucketCapacity strictly greater than 0"); } @@ -258,7 +258,7 @@ class BucketConnDynamic size_t total_num_connectivity() const { return m_connectivity.size() - m_numUnusedEntries; } size_t num_unused_entries() const { return m_numUnusedEntries; } - void compress_connectivity(unsigned suggestedCapacity = 0) + void compress_connectivity() { if (m_numUnusedEntries == 0) { return; @@ -356,9 +356,10 @@ class BucketConnDynamic Permutation perm = INVALID_PERMUTATION) { static constexpr unsigned minSizeHeuristic = 256; - if (total_num_connectivity() > minSizeHeuristic && (static_cast(m_numUnusedEntries)/total_num_connectivity()) > m_compressionThreshold) + if ((total_num_connectivity() > minSizeHeuristic) && + (total_num_connectivity() < m_numUnusedEntries*m_compressionThreshold)) { - compress_connectivity(total_num_connectivity()+m_numUnusedEntries/2); + compress_connectivity(); } grow_if_necessary(bktOrdinal); @@ -501,7 +502,7 @@ class BucketConnDynamic std::vector m_ordinals; std::vector m_permutations; unsigned m_numUnusedEntries; - double m_compressionThreshold; + int m_compressionThreshold; }; } // namespace impl diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp index 9f58a66dd2fc..afde826b7583 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp @@ -37,10 +37,10 @@ #include // for operator new #include // for operator<<, etc #include // for runtime_error -#include // for Bucket, raw_part_equal +#include // for Bucket #include // for BulkData, etc #include -#include // for Partition, lower_bound +#include // for Partition, upper_bound #include #include #include "stk_mesh/base/BucketConnectivity.hpp" // for BucketConnectivity @@ -162,85 +162,31 @@ void BucketRepository::ensure_data_structures_sized() } } -//// -//// Note that we need to construct a key vector that the particular -//// format so we can use the lower_bound(..) function to lookup the -//// partition. Because we are using partitions now instead of -//// buckets, it should be possible to do without that vector and -//// instead do the lookup directly from the OrdinalVector. -//// - Partition *BucketRepository::get_or_create_partition( const EntityRank arg_entity_rank , const OrdinalVector &parts) { - const unsigned maxKeyTmpBufferSize = 64; - PartOrdinal keyTmpBuffer[maxKeyTmpBufferSize]; - OrdinalVector keyTmpVec; - - PartOrdinal* keyPtr = nullptr; - PartOrdinal* keyEnd = nullptr; - - fill_key_ptr(parts, &keyPtr, &keyEnd, maxKeyTmpBufferSize, keyTmpBuffer, keyTmpVec); - std::vector::iterator ik; - Partition* partition = get_partition(arg_entity_rank, parts, ik, keyPtr, keyEnd); + Partition* partition = get_partition(arg_entity_rank, parts, ik); if(partition == nullptr) { - partition = create_partition(arg_entity_rank, parts, ik, keyPtr, keyEnd); + partition = create_partition(arg_entity_rank, parts, ik); } return partition; } -void BucketRepository::fill_key_ptr(const OrdinalVector& parts, PartOrdinal** keyPtr, PartOrdinal** keyEnd, - const unsigned maxKeyTmpBufferSize, PartOrdinal* keyTmpBuffer, OrdinalVector& keyTmpVec) -{ - const size_t part_count = parts.size(); - - const size_t keyLen = 1 + part_count; - - *keyPtr = keyTmpBuffer; - *keyEnd = *keyPtr+keyLen; - - if (keyLen >= maxKeyTmpBufferSize) { - keyTmpVec.resize(keyLen); - *keyPtr = keyTmpVec.data(); - *keyEnd = *keyPtr+keyLen; - } - - //---------------------------------- - // Key layout: - // { part_count , { part_ordinals } } - // - (*keyPtr)[0] = part_count; - - for ( unsigned i = 0 ; i < part_count ; ++i ) { - (*keyPtr)[i+1] = parts[i]; - } -} - Partition *BucketRepository::get_partition(const EntityRank arg_entity_rank, const OrdinalVector &parts) { - PartOrdinal* keyPtr = nullptr; - PartOrdinal* keyEnd = nullptr; std::vector::iterator ik; - const unsigned maxKeyTmpBufferSize = 64; - PartOrdinal keyTmpBuffer[maxKeyTmpBufferSize]; - OrdinalVector keyTmpVec; - - fill_key_ptr(parts, &keyPtr, &keyEnd, maxKeyTmpBufferSize, keyTmpBuffer, keyTmpVec); - - return get_partition(arg_entity_rank, parts, ik, keyPtr, keyEnd); + return get_partition(arg_entity_rank, parts, ik); } Partition *BucketRepository::get_partition( const EntityRank arg_entity_rank , const OrdinalVector &parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd) + std::vector::iterator& ik) { STK_ThrowAssertMsg(m_mesh.mesh_meta_data().check_rank(arg_entity_rank), "Entity rank " << arg_entity_rank << " is invalid"); @@ -249,12 +195,12 @@ Partition *BucketRepository::get_partition( std::vector & partitions = m_partitions[ arg_entity_rank ]; - ik = lower_bound( partitions , keyPtr ); - const bool partition_exists = (ik != partitions.end()) && raw_part_equal( (*ik)->key() , keyPtr ); + ik = upper_bound( partitions , parts ); + const bool partition_exists = (ik != partitions.begin() && (ik[-1])->get_legacy_partition_id() == parts ); if (partition_exists) { - return *ik; + return ik[-1]; } return nullptr; @@ -263,11 +209,9 @@ Partition *BucketRepository::get_partition( Partition* BucketRepository::create_partition( const EntityRank arg_entity_rank, const OrdinalVector& parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd) + std::vector::iterator& ik) { - Partition *partition = new Partition(m_mesh, this, arg_entity_rank, keyPtr, keyEnd); + Partition *partition = new Partition(m_mesh, this, arg_entity_rank, parts.data(), parts.data()+parts.size()); STK_ThrowRequire(partition != nullptr); m_need_sync_from_partitions[arg_entity_rank] = true; @@ -411,8 +355,7 @@ Bucket *BucketRepository::allocate_bucket(EntityRank entityRank, unsigned initialCapacity, unsigned maximumCapacity) { - std::vector tmp(key.begin()+1,key.end()); - STK_ThrowAssertMsg(stk::util::is_sorted_and_unique(tmp,std::less()), + STK_ThrowAssertMsg(stk::util::is_sorted_and_unique(key,std::less()), "bucket created with 'key' vector that's not sorted and unique"); BucketVector &bucket_vec = m_buckets[entityRank]; const unsigned bucket_id = bucket_vec.size(); diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp index 27366e248101..801eb0076cf8 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp @@ -120,15 +120,11 @@ class BucketRepository Partition *get_partition(const EntityRank arg_entity_rank , const OrdinalVector &parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd); + std::vector::iterator& ik); Partition *create_partition(const EntityRank arg_entity_rank , const OrdinalVector &parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd); + std::vector::iterator& ik); // For use by BulkData::internal_modification_end(). void internal_modification_end(); @@ -168,14 +164,9 @@ class BucketRepository void ensure_data_structures_sized(); - void fill_key_ptr(const OrdinalVector& parts, PartOrdinal** keyPtr, PartOrdinal** keyEnd, - const unsigned maxKeyTmpBufferSize, PartOrdinal* keyTmpBuffer, OrdinalVector& keyTmpVec); + BulkData & m_mesh ; - - BulkData & m_mesh ; // Associated Bulk Data Aggregate - - // Vector of bucket pointers by rank. This is now a cache and no longer the primary - // location of Buckets when USE_STK_MESH_IMPL_PARTITION is #defined. + // Vector of bucket pointers for each rank. std::vector< BucketVector > m_buckets ; std::vector > m_partitions; diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp index a1a0cbfd8afe..51feaff89523 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp @@ -640,11 +640,11 @@ Entity connect_element_to_entity(BulkData & mesh, Entity elem, Entity entity, OrdinalVector entity_node_ordinals(entity_top.num_nodes()); elem_top.sub_topology_node_ordinals(mesh.entity_rank(entity), relationOrdinal, entity_node_ordinals.data()); - const stk::mesh::Entity *elem_nodes = mesh.begin_nodes(elem); + stk::mesh::EntityVector elem_nodes(mesh.begin_nodes(elem),mesh.end_nodes(elem)); EntityVector entity_top_nodes(entity_top.num_nodes()); - elem_top.sub_topology_nodes(elem_nodes, mesh.entity_rank(entity), relationOrdinal, entity_top_nodes.data()); + elem_top.sub_topology_nodes(elem_nodes.data(), mesh.entity_rank(entity), relationOrdinal, entity_top_nodes.data()); - Permutation perm = stk::mesh::find_permutation(mesh, elem_top, elem_nodes, entity_top, entity_top_nodes.data(), relationOrdinal); + Permutation perm = stk::mesh::find_permutation(mesh, elem_top, elem_nodes.data(), entity_top, entity_top_nodes.data(), relationOrdinal); OrdinalVector scratch1, scratch2, scratch3; @@ -676,10 +676,9 @@ Entity connect_element_to_entity(BulkData & mesh, Entity elem, Entity entity, if(0 == num_side_nodes) { Permutation node_perm = stk::mesh::Permutation::INVALID_PERMUTATION; - Entity const *elem_nodes_local = mesh.begin_nodes(elem); for(unsigned i = 0; i < entity_top.num_nodes(); ++i) { - Entity node = elem_nodes_local[entity_node_ordinals[i]]; + Entity node = elem_nodes[entity_node_ordinals[i]]; mesh.declare_relation(entity, node, i, node_perm, scratch1, scratch2, scratch3); } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp index 2566e59bb9ee..a1fa3b4eecd8 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp @@ -55,8 +55,13 @@ bool MeshModification::modification_begin(const std::string description) const stk::mesh::FieldVector allFields = m_bulkData.mesh_meta_data().get_fields(); for (FieldBase * stkField : allFields) { stkField->sync_to_host(); - if (stkField->has_ngp_field()) { - impl::get_ngp_field(*stkField)->debug_modification_begin(); + } + + if (m_bulkData.mesh_meta_data().is_field_sync_debugger_enabled()) { + for (FieldBase * stkField : allFields) { + if (stkField->has_ngp_field()) { + impl::get_ngp_field(*stkField)->debug_modification_begin(); + } } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp index 7af403e25567..8f3f1d0f49ea 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp @@ -53,11 +53,6 @@ template struct NgpMeshHostData : NgpMeshHostDataBase { typename EntityKeyViewTypeT::HostMirror hostEntityKeys; - typename BucketEntityOffsetsViewTypeT::HostMirror hostBucketEntityOffsets[stk::topology::NUM_RANKS]; - typename UnsignedViewTypeT::HostMirror hostEntityConnectivityOffset[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - typename EntityViewTypeT::HostMirror hostSparseConnectivity[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - typename OrdinalViewTypeT::HostMirror hostSparseConnectivityOrdinals[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - typename PermutationViewTypeT::HostMirror hostSparsePermutations[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; typename UnsignedViewTypeT::HostMirror hostVolatileFastSharedCommMapOffset[stk::topology::NUM_RANKS]; typename NgpCommMapIndicesT::HostMirror hostVolatileFastSharedCommMap[stk::topology::NUM_RANKS]; unsigned volatileFastSharedCommMapSyncCount = 0; diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp index cc95d6223656..8d9a0387d908 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp @@ -326,7 +326,7 @@ stk::mesh::FieldVector get_fields_for_bucket(const stk::mesh::BulkData& mesh, void Partition::sort(const EntitySorterBase& sorter) { - std::vector partition_key = get_legacy_partition_id(); + const std::vector& partition_key = get_legacy_partition_id(); std::vector entities(m_size); @@ -540,7 +540,7 @@ stk::mesh::Bucket *Partition::get_bucket_for_adds() clear_pending_removes_by_filling_from_end(); if (no_buckets()) { - std::vector partition_key = get_legacy_partition_id(); + const std::vector& partition_key = get_legacy_partition_id(); Bucket *bucket = m_repository->allocate_bucket(m_rank, partition_key, m_repository->get_initial_bucket_capacity(), m_repository->get_maximum_bucket_capacity()); @@ -554,7 +554,7 @@ stk::mesh::Bucket *Partition::get_bucket_for_adds() if (bucket->size() == bucket->capacity()) { if (bucket->size() == m_repository->get_maximum_bucket_capacity()) { - std::vector partition_key = get_legacy_partition_id(); + const std::vector& partition_key = get_legacy_partition_id(); bucket = m_repository->allocate_bucket(m_rank, partition_key, m_repository->get_initial_bucket_capacity(), m_repository->get_maximum_bucket_capacity()); diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp index a10a754fb2a2..97916dc9525e 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp @@ -209,35 +209,26 @@ class Partition std::ostream &operator<<(std::ostream &, const stk::mesh::impl::Partition &); -inline -bool partition_key_less( const unsigned * lhs , const unsigned * rhs ) -{ -// The following (very old) code is clever... So I'm adding some comments. -// -// A partition key is an array of unsigned, laid out like this: -// key[num-part-ordinals, first-part-ordinal, ..., last-part-ordinal] - - if (*lhs == *rhs) { //num-part-ordinals is equal for lhs and rhs... - const unsigned * const last_lhs = lhs + *lhs; - do { - ++lhs ; ++rhs ; - } while ( last_lhs != lhs && *lhs == *rhs ); - } - return *lhs < *rhs; -} - struct PartitionLess { - bool operator()( const Partition * lhs_Partition , const unsigned * rhs ) const - { return partition_key_less( lhs_Partition->key() , rhs ); } + bool operator()( const Partition * lhs_Partition , const OrdinalVector& rhs ) const + { + return lhs_Partition->get_legacy_partition_id().size() != rhs.size() ? + lhs_Partition->get_legacy_partition_id().size() < rhs.size() : + lhs_Partition->get_legacy_partition_id() < rhs; + } - bool operator()( const unsigned * lhs , const Partition * rhs_Partition ) const - { return partition_key_less( lhs , rhs_Partition->key() ); } + bool operator()( const OrdinalVector& lhs , const Partition * rhs_Partition ) const + { + return lhs.size() != rhs_Partition->get_legacy_partition_id().size() ? + lhs.size() < rhs_Partition->get_legacy_partition_id().size() : + lhs < rhs_Partition->get_legacy_partition_id(); + } }; inline std::vector::iterator -lower_bound( std::vector & v , const unsigned * key ) -{ return std::lower_bound( v.begin() , v.end() , key , PartitionLess() ); } +upper_bound( std::vector & v , const OrdinalVector& key ) +{ return std::upper_bound( v.begin() , v.end() , key , PartitionLess() ); } } // impl } // mesh diff --git a/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp b/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp index 4f19954f4e60..6953533e6049 100644 --- a/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp +++ b/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp @@ -10,6 +10,10 @@ namespace impl { class AbstractCDTInterface { + public: + virtual ~AbstractCDTInterface() = default; + + private: virtual void triangulate(const utils::impl::Projection& proj) = 0; }; diff --git a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp index bddbe88eb692..de5dea0daa9a 100644 --- a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp +++ b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp @@ -288,20 +288,20 @@ void StkMeshCreator::setup_edge_sharing(std::shared_ptr mesh, MeshFi constexpr unsigned maxNumEdgeNodes = 3; std::vector edgeNodes(maxNumEdgeNodes); std::vector edgeVerts(maxNumEdgeNodes); - + const std::vector& surfaceElems = mesh->get_elements(); for(const mesh::MeshEntityPtr& elem : surfaceElems) { if (elem) { const stk::mesh::SideSetEntry& ssetEntry = (*stkElsField)(elem, 0, 0); stk::mesh::Entity stkEl = ssetEntry.element; - + const bool stkElemIsFace = ssetEntry.side != stk::mesh::INVALID_CONNECTIVITY_ORDINAL; if (stkElemIsFace) { stkEl = stk::mesh::get_side_entity_for_elem_side_pair(bulk, stkEl, ssetEntry.side); } - + stk::topology stkTopo = bulk.bucket(stkEl).topology(); - + const stk::mesh::Entity* nodes = bulk.begin_nodes(stkEl); for(int dn=0; dncount_down(); ++dn) { @@ -309,7 +309,7 @@ void StkMeshCreator::setup_edge_sharing(std::shared_ptr mesh, MeshFi STK_ThrowRequire((edgeEnt && edgeEnt->get_type() == mesh::MeshEntityType::Edge)); edgeNodes.resize(edgeEnt->count_down()); stkTopo.edge_nodes(nodes, dn, edgeNodes.data()); - + edgeVerts.resize(edgeEnt->count_down()); for(int n=0; ncount_down(); ++n) { diff --git a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp index f66daa9cacae..075445307309 100644 --- a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp +++ b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp @@ -28,7 +28,7 @@ stk::mesh::Field* StkFieldCopier::create_stk_field(mesh::FieldPtrmesh_meta_data_ptr(); stk::mesh::Field* stkField = &(metaData->declare_field(stk::topology::NODE_RANK, name)); - stk::mesh::put_field_on_mesh(*stkField, *m_part, middleMeshField->get_num_comp(), + stk::mesh::put_field_on_mesh(*stkField, *m_part, middleMeshField->get_num_comp(), middleMeshField->get_field_shape().get_num_nodes(0), 0); return stkField; @@ -38,7 +38,8 @@ void StkFieldCopier::copy(const stk::mesh::Field& stkField, mesh::FieldP { check_field_shapes(stkField, middleMeshFieldPtr); - stk::mesh::Selector selector(stkField); + auto meshMetaDataPtr = m_bulkDataPtr->mesh_meta_data_ptr(); + stk::mesh::Selector selector(stkField & (meshMetaDataPtr->locally_owned_part() | meshMetaDataPtr->globally_shared_part())); const stk::mesh::BucketVector& buckets = m_bulkDataPtr->get_buckets(stk::topology::NODE_RANK, selector); int numNodesPerEntity = middleMeshFieldPtr->get_field_shape().get_num_nodes(0); @@ -62,7 +63,8 @@ void StkFieldCopier::copy(const mesh::FieldPtr middleMeshFieldPtr, stk:: { check_field_shapes(stkField, middleMeshFieldPtr); - stk::mesh::Selector selector(stkField); + auto meshMetaDataPtr = m_bulkDataPtr->mesh_meta_data_ptr(); + stk::mesh::Selector selector(stkField & (meshMetaDataPtr->locally_owned_part() | meshMetaDataPtr->globally_shared_part())); const stk::mesh::BucketVector& buckets = m_bulkDataPtr->get_buckets(stk::topology::NODE_RANK, selector); int numNodesPerEntity = middleMeshFieldPtr->get_field_shape().get_num_nodes(0); @@ -109,7 +111,7 @@ void StkFieldCopier::check_field_shapes(const stk::mesh::Field& stkField std::string("Field shapes not compatible: stk field has ") + std::to_string(stk_field_dims.second) + " components per node, while the middle mesh field has " + std::to_string(meshField->get_num_comp()) ); - } + } } std::pair StkFieldCopier::get_field_shape_and_num_components(const stk::mesh::Field& stkField) diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp b/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp index 617d1d095b46..f318776a5239 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp +++ b/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp @@ -5,6 +5,12 @@ #include "stk_util/ngp/NgpSpaces.hpp" #include "NgpTestDeviceMacros.hpp" +// RDC is required for HIP build since registering a static global variable +// on an inline variable is not functional as of rocm 6.2.7 +#if defined(KOKKOS_ENABLE_HIP) && !defined(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) +#error "Kokkos_ENABLE_HIP_RELOCATABLE_DEVICE_CODE is required for HIP build" +#endif + namespace ngp_testing { template diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp b/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp index 0aac6f0aa609..605d54f699dc 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp +++ b/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp @@ -6,10 +6,4 @@ #define NGP_TEST_FUNCTION KOKKOS_FUNCTION #define NGP_TEST_INLINE KOKKOS_INLINE_FUNCTION -#ifdef STK_ENABLE_GPU -#define NGP_TEST_DEVICE_ONLY __device__ -#else -#define NGP_TEST_DEVICE_ONLY -#endif - #endif diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp b/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp index 0f25e3439677..744c2666c36e 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp +++ b/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp @@ -63,12 +63,6 @@ bool expect_near(const T a, const T b, const T tolerance) { #define NUM_TO_STR(x) NGP_TEST_STRINGIZE(x) #define LOCATION __FILE__ ":" NUM_TO_STR(__LINE__) -#ifdef __HIP_DEVICE_COMPILE__ -//FIXME: unsupported indirect call to function on HIP-Clang -#define NGP_EXPECT_TRUE(cond) -#define NGP_ASSERT_TRUE(cond) - -#else #define NGP_EXPECT_TRUE(cond) \ do { \ if (!(cond)) { \ @@ -83,7 +77,6 @@ bool expect_near(const T a, const T b, const T tolerance) { return; \ } \ } while (false) -#endif #define NGP_EXPECT_FALSE(cond) NGP_EXPECT_TRUE(!(cond)) #define NGP_ASSERT_FALSE(cond) NGP_ASSERT_TRUE(!(cond)) @@ -106,12 +99,6 @@ bool expect_near(const T a, const T b, const T tolerance) { #define NGP_EXPECT_GE(a, b) NGP_EXPECT_TRUE((a) >= (b)) #define NGP_ASSERT_GE(a, b) NGP_ASSERT_TRUE((a) >= (b)) -#ifdef __HIP_DEVICE_COMPILE__ -//FIXME: unsupported indirect call to function on HIP-Clang -#define NGP_EXPECT_NEAR(a, b, tolerance) -#define NGP_ASSERT_NEAR(a, b, tolerance) - -#else #define NGP_EXPECT_NEAR(a, b, tolerance) \ do { \ if (!::ngp_testing::internal::expect_near(a, b, tolerance)) { \ @@ -126,7 +113,6 @@ bool expect_near(const T a, const T b, const T tolerance) { return; \ } \ } while (false) -#endif namespace ngp_testing { diff --git a/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp b/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp index 42dbab9fc055..05a429c2a069 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp @@ -74,9 +74,14 @@ class NgpMeshChangeElementPartMembership : public stk::unit_test_util::MeshFixtu void batch_change_element_part_membership(int cycle) { + Kokkos::Profiling::pushRegion("BulkData::batch_change_entity_parts"); get_bulk().batch_change_entity_parts(stk::mesh::EntityVector{get_element(cycle)}, stk::mesh::PartVector{get_part()}, {}); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("get_updated_ngp_mesh"); stk::mesh::get_updated_ngp_mesh(get_bulk()); + Kokkos::Profiling::popRegion(); } private: @@ -232,9 +237,14 @@ TEST_F( NgpMeshChangeElementPartMembership, TimingBatch ) batchTimer.start_batch_timer(); setup_host_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + Kokkos::Profiling::pushRegion("batch_change_element_part_membership"); + for (int i = 0; i < NUM_ITERS; i++) { batch_change_element_part_membership(i); } + + Kokkos::Profiling::popRegion(); + batchTimer.stop_batch_timer(); reset_mesh(); } diff --git a/packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp b/packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp new file mode 100644 index 000000000000..44a7a65b42ce --- /dev/null +++ b/packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp @@ -0,0 +1,136 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + +TEST(StkNgpField, multiStateRotation) +{ + stk::ParallelMachine comm = stk::parallel_machine_world(); + if (stk::parallel_machine_size(comm) > 1) { GTEST_SKIP(); } + + const unsigned NUM_RUNS = 5; + const unsigned NUM_ITERS = 3000; + std::string meshSpec = "generated:80x80x80"; + + std::cout << "Using mesh-spec: " << meshSpec << std::endl; + + stk::unit_test_util::BatchTimer batchTimer(comm); + + batchTimer.initialize_batch_timer(); + + std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(comm) + .set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA) + .set_spatial_dimension(3) + .create(); + + stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); + const int numFieldStates = 3; + stk::mesh::Field& tensorField1 = meta.declare_field(stk::topology::ELEM_RANK, "tensorField1", numFieldStates); + stk::mesh::Field& tensorField2 = meta.declare_field(stk::topology::ELEM_RANK, "tensorField2", numFieldStates); + stk::mesh::Field& vectorField1 = meta.declare_field(stk::topology::ELEM_RANK, "vectorField1", numFieldStates); + stk::mesh::Field& vectorField2 = meta.declare_field(stk::topology::ELEM_RANK, "vectorField2", numFieldStates); + stk::mesh::put_field_on_mesh(tensorField1, meta.universal_part(), 9, nullptr); + stk::mesh::put_field_on_mesh(tensorField2, meta.universal_part(), 9, nullptr); + stk::mesh::put_field_on_mesh(vectorField1, meta.universal_part(), 3, nullptr); + stk::mesh::put_field_on_mesh(vectorField2, meta.universal_part(), 3, nullptr); + + stk::io::fill_mesh(meshSpec, *bulkPtr); + + Kokkos::Profiling::pushRegion("get_updated_ngp_mesh"); + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(*bulkPtr); + EXPECT_FALSE(ngpMesh.need_sync_to_host()); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("initialize fields"); + stk::ngp::ExecSpace execSpace; + constexpr double initValue1 = 1.14; + constexpr double initValue2 = 3.14; + for(int s=0; s(s); + stk::mesh::Field& tensorField1_state = tensorField1.field_of_state(state); + stk::mesh::Field& tensorField2_state = tensorField2.field_of_state(state); + stk::mesh::Field& vectorField1_state = vectorField1.field_of_state(state); + stk::mesh::Field& vectorField2_state = vectorField2.field_of_state(state); + stk::mesh::field_fill(initValue1, tensorField1_state, execSpace); + stk::mesh::field_fill(initValue2, tensorField2_state, execSpace); + stk::mesh::field_fill(initValue1, vectorField1_state, execSpace); + stk::mesh::field_fill(initValue2, vectorField2_state, execSpace); + } + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("multiStateRotation test"); + + for (unsigned j = 0; j < NUM_RUNS; j++) { + + batchTimer.start_batch_timer(); + + for(unsigned i=0; iupdate_field_data_states(rotateNgpFieldViews); + Kokkos::Profiling::popRegion(); + } + + batchTimer.stop_batch_timer(); + } + + Kokkos::Profiling::popRegion(); + batchTimer.print_batch_timing(NUM_ITERS); +} + +} diff --git a/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp b/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp index 3cc7847388fd..a4b6c074c663 100644 --- a/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp +++ b/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp @@ -114,14 +114,18 @@ void run_volume_to_one_test_with_views(const std::string& meshFileName, stk::io::fill_mesh_with_auto_decomp(meshFileName, *bulkPtr); Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + auto elemBoxesHost = Kokkos::create_mirror_view(elemBoxes); + Kokkos::deep_copy(elemBoxesHost, elemBoxes); Kokkos::View supersetBoxes("Range Boxes", 1); - supersetBoxes(0) = {elemBoxes[0].box, IdentProc(pRank, pRank)}; + auto supersetBoxesHost = Kokkos::create_mirror_view(supersetBoxes); + supersetBoxesHost(0) = {elemBoxesHost[0].box, IdentProc(pRank, pRank)}; - for (unsigned i = 0; i != elemBoxes.extent(0); ++i) { - stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); + for (unsigned i = 0; i != elemBoxesHost.extent(0); ++i) { + stk::search::add_to_box(supersetBoxesHost(0).box, elemBoxesHost(i).box); } + Kokkos::deep_copy(supersetBoxes, supersetBoxesHost); batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { @@ -219,14 +223,19 @@ void run_volume_to_one_test_local_with_views(const std::string& meshFileName, stk::io::fill_mesh_with_auto_decomp(meshFileName, *bulkPtr); Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + auto elemBoxesHost = Kokkos::create_mirror_view(elemBoxes); + Kokkos::deep_copy(elemBoxesHost, elemBoxes); Kokkos::View supersetBoxes("Range Boxes", 1); - supersetBoxes(0) = {elemBoxes[0].box, stk::parallel_machine_rank(comm)}; - + auto supersetBoxesHost = Kokkos::create_mirror_view(supersetBoxes); + supersetBoxesHost(0) = {elemBoxesHost[0].box, stk::parallel_machine_rank(comm)}; + for (unsigned i = 0; i != elemBoxes.extent(0); ++i) { - stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); + stk::search::add_to_box(supersetBoxesHost(0).box, elemBoxesHost(i).box); } + Kokkos::deep_copy(supersetBoxes, supersetBoxesHost); + batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { Kokkos::View searchResults; @@ -259,14 +268,19 @@ void run_one_to_volume_test_local_with_views(const std::string& meshFileName, stk::io::fill_mesh_with_auto_decomp(meshFileName, *bulkPtr); Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + auto elemBoxesHost = Kokkos::create_mirror_view(elemBoxes); + Kokkos::deep_copy(elemBoxesHost, elemBoxes); Kokkos::View supersetBoxes("Range Boxes", 1); - supersetBoxes(0) = {elemBoxes[0].box, stk::parallel_machine_rank(comm)}; + auto supersetBoxesHost = Kokkos::create_mirror_view(supersetBoxes); + supersetBoxesHost(0) = {elemBoxesHost[0].box, stk::parallel_machine_rank(comm)}; - for (unsigned i = 0; i != elemBoxes.extent(0); ++i) { - stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); + for (unsigned i = 0; i != elemBoxesHost.extent(0); ++i) { + stk::search::add_to_box(supersetBoxesHost(0).box, elemBoxesHost(i).box); } + Kokkos::deep_copy(supersetBoxes, supersetBoxesHost); + batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { Kokkos::View searchResults; diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp index ff76f6ad9c5d..0b6adaae9421 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp @@ -628,6 +628,8 @@ struct UpdateInteriorNodeBVs KOKKOS_INLINE_FUNCTION void operator()(unsigned argIdx) const; + KOKKOS_INLINE_FUNCTION + void check_tree(unsigned argIdx) const; KOKKOS_FORCEINLINE_FUNCTION void get_box(RealType bvMinMax[6], LocalOrdinal idx, const bboxes_3d_view_amt &boxesMinMax) const; @@ -661,10 +663,71 @@ template void UpdateInteriorNodeBVs::apply(const MortonAabbTree &tree, ExecutionSpace const& execSpace) { const UpdateInteriorNodeBVs op(tree); - const size_t numLeaves = tree.hm_numLeaves(); + const size_t numLeaves = tree.hm_numLeaves(); auto policy = Kokkos::RangePolicy(execSpace, 0, numLeaves); - Kokkos::parallel_for(policy, op); + Kokkos::parallel_for("UpdateInteriorNodeBVs", policy, op); + Kokkos::parallel_for("check_tree", policy, KOKKOS_LAMBDA(const unsigned& argIdx){op.check_tree(argIdx);}); +} + +template +KOKKOS_INLINE_FUNCTION +void UpdateInteriorNodeBVs::check_tree(unsigned argIdx) const +{ + if (m_numLeaves > 1) { + LocalOrdinal idx = static_cast(argIdx); + + RealType bvMinMax[6]; + + LocalOrdinal parent = tm_nodeParents(idx); + RealType sibMinMax[6]; + + constexpr RealType tol = std::numeric_limits::epsilon(); + bool fixedBox = false; + + while (idx != parent) { + const LocalOrdinal parentIdx = parent - m_numLeaves; + + const bool boxIsAllZeros = ((m_nodeMinMaxs(parentIdx, 0) < tol) + &&(m_nodeMinMaxs(parentIdx, 1) < tol) + &&(m_nodeMinMaxs(parentIdx, 2) < tol) + &&(m_nodeMinMaxs(parentIdx, 3) < tol) + &&(m_nodeMinMaxs(parentIdx, 4) < tol) + &&(m_nodeMinMaxs(parentIdx, 5) < tol)); + if (boxIsAllZeros || fixedBox) { + const LocalOrdinal sib0 = tm_nodeChildren(parent, 0); + const LocalOrdinal sib1 = tm_nodeChildren(parent, 1); + + if (sib0 < m_numLeaves) { + get_stk_box(bvMinMax, sib0, m_leafMinMaxs); + } + else { + get_box(bvMinMax, sib0-m_numLeaves, m_nodeMinMaxs); + } + + if (sib1 < m_numLeaves) { + get_stk_box(sibMinMax, sib1, m_leafMinMaxs); + } + else { + get_box(sibMinMax, sib1-m_numLeaves, m_nodeMinMaxs); + } + + m_nodeMinMaxs(parentIdx, 0) = AABB_MIN(bvMinMax[0], sibMinMax[0]); + m_nodeMinMaxs(parentIdx, 1) = AABB_MIN(bvMinMax[1], sibMinMax[1]); + m_nodeMinMaxs(parentIdx, 2) = AABB_MIN(bvMinMax[2], sibMinMax[2]); + m_nodeMinMaxs(parentIdx, 3) = AABB_MAX(bvMinMax[3], sibMinMax[3]); + m_nodeMinMaxs(parentIdx, 4) = AABB_MAX(bvMinMax[4], sibMinMax[4]); + m_nodeMinMaxs(parentIdx, 5) = AABB_MAX(bvMinMax[5], sibMinMax[5]); + fixedBox = true; + } + + idx = parent; + parent = tm_nodeParents(parent); + if (idx == parent) { + return; + } + } + } } template diff --git a/packages/stk/stk_topology/stk_topology/topology_defn.hpp b/packages/stk/stk_topology/stk_topology/topology_defn.hpp index 319635eb70bf..e72e05cb7db2 100644 --- a/packages/stk/stk_topology/stk_topology/topology_defn.hpp +++ b/packages/stk/stk_topology/stk_topology/topology_defn.hpp @@ -76,13 +76,7 @@ void topology::sub_topology_node_ordinals(unsigned sub_rank, unsigned sub_ordina { case NODE_RANK: *output_ordinals = sub_ordinal; break; case EDGE_RANK: edge_node_ordinals(sub_ordinal, output_ordinals); break; - case FACE_RANK: - if (has_mixed_rank_sides() && sub_ordinal >= num_faces()) { - edge_node_ordinals(sub_ordinal - num_faces(), output_ordinals); - } else { - face_node_ordinals(sub_ordinal, output_ordinals); - } - break; + case FACE_RANK: face_node_ordinals(sub_ordinal, output_ordinals); break; default: break; } } @@ -95,13 +89,7 @@ void topology::sub_topology_nodes(const NodeArray & nodes, unsigned sub_rank, un { case NODE_RANK: *output_nodes = nodes[sub_ordinal]; break; case EDGE_RANK: edge_nodes(nodes, sub_ordinal, output_nodes); break; - case FACE_RANK: - if (has_mixed_rank_sides() && sub_ordinal >= num_faces()) { - edge_nodes(nodes, sub_ordinal - num_faces(), output_nodes); - } else { - face_nodes(nodes, sub_ordinal, output_nodes); - } - break; + case FACE_RANK: face_nodes(nodes, sub_ordinal, output_nodes); break; default: break; } } @@ -126,11 +114,7 @@ topology topology::sub_topology(unsigned sub_rank, unsigned sub_ordinal) const { case NODE_RANK: return NODE; case EDGE_RANK: return edge_topology(sub_ordinal); - case FACE_RANK: - if (has_mixed_rank_sides() && sub_ordinal >= num_faces()) { - return edge_topology(sub_ordinal - num_faces()); - } - return face_topology(sub_ordinal); + case FACE_RANK: return face_topology(sub_ordinal); default: break; } return INVALID_TOPOLOGY; @@ -140,22 +124,20 @@ template STK_INLINE_FUNCTION void topology::side_node_ordinals(unsigned side_ordinal, OrdinalOutputIterator output_ordinals) const { - if (has_mixed_rank_sides() && is_shell_side_ordinal(side_ordinal)) { - sub_topology_node_ordinals(EDGE_RANK, side_ordinal-num_faces(), output_ordinals); - } else { - sub_topology_node_ordinals( side_rank(), side_ordinal, output_ordinals); - } + auto fix_ordinal = has_mixed_rank_sides() && side_ordinal >= num_sub_topology(side_rank()); + auto adjusted_ordinal = (fix_ordinal) ? side_ordinal - num_sub_topology(side_rank()) : side_ordinal; + + sub_topology_node_ordinals(side_rank(side_ordinal), adjusted_ordinal, output_ordinals); } template STK_INLINE_FUNCTION void topology::side_nodes(const NodeArray & nodes, unsigned side_ordinal, NodeOutputIterator output_nodes) const { - if (has_mixed_rank_sides() && is_shell_side_ordinal(side_ordinal)) { - sub_topology_nodes( nodes, EDGE_RANK, side_ordinal-num_faces(), output_nodes); - } else { - sub_topology_nodes( nodes, side_rank(), side_ordinal, output_nodes); - } + auto fix_ordinal = has_mixed_rank_sides() && side_ordinal >= num_sub_topology(side_rank()); + auto adjusted_ordinal = (fix_ordinal) ? side_ordinal - num_sub_topology(side_rank()) : side_ordinal; + + sub_topology_nodes(nodes, side_rank(side_ordinal), adjusted_ordinal, output_nodes); } STK_INLINE_FUNCTION @@ -165,7 +147,7 @@ unsigned topology::num_sides() const if (side_rank() != INVALID_RANK) { num_sides_out = side_rank() > NODE_RANK ? num_sub_topology(side_rank()) : num_vertices(); - if (has_mixed_rank_sides()) { + if (has_mixed_rank_sides() && side_rank() > EDGE_RANK) { num_sides_out += num_sub_topology(EDGE_RANK); } } @@ -175,10 +157,10 @@ unsigned topology::num_sides() const STK_INLINE_FUNCTION topology topology::side_topology(unsigned side_ordinal) const { - if (has_mixed_rank_sides() && is_shell_side_ordinal(side_ordinal)) - return shell_side_topology(side_ordinal-num_faces()); + auto fix_ordinal = has_mixed_rank_sides() && side_ordinal >= num_sub_topology(side_rank()); + auto adjusted_ordinal = (fix_ordinal) ? side_ordinal - num_sub_topology(side_rank()) : side_ordinal; - return sub_topology(side_rank(), side_ordinal); + return sub_topology(side_rank(side_ordinal), adjusted_ordinal); } STK_INLINE_FUNCTION diff --git a/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp b/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp index 49ec7291e66f..383d4132448b 100644 --- a/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp +++ b/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp @@ -50,6 +50,7 @@ class SearchById { using KeyToTargetProcessor = std::vector>; using MeshIDSet = std::set; + virtual ~SearchById() = default; virtual void intialize(const TransferCopyByIdMeshAdapter & mesha, const TransferCopyByIdMeshAdapter & meshb) =0; virtual void do_search(const TransferCopyByIdMeshAdapter & mesha, const TransferCopyByIdMeshAdapter & meshb, diff --git a/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp b/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp index 02edd92df1c3..1648da606e66 100644 --- a/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp +++ b/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp @@ -111,6 +111,7 @@ class TranslatorBase TranslatorBase() {} virtual void translate(const void* srcAddr, unsigned srcDataByteSize, DataTypeKey::data_t destType, void* destAddr, unsigned destDataByteSize) const = 0; + virtual ~TranslatorBase() = default; }; struct TranslatorInfo diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp index 1f52825509ad..977e40aa4fca 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp @@ -45,7 +45,18 @@ struct StkTopologyMapEntry { bool operator!=(const StkTopologyMapEntry &rhs) const { return !(*this == rhs); } - int num_sides() const { return topology.num_sides(); } + int num_face_sides() const { + return 2; // FIXME: Number of stackable faces for a 3D shell is always 2 in STK + } + + int num_sides() const { + if (topology.is_shell()) { + if (topology.dimension() == 3) { + return num_face_sides(); // FIXME: Number of stackable faces for a 3D shell is always 2 in STK + } + } + return topology.num_sides(); + } bool valid_side(unsigned side) const { diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp index cd873a952e1c..a53ab4ac9fdb 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp @@ -33,14 +33,12 @@ // #include // for to_string -#include // for Utils #include // for size_t -#include // for ostream +#include #include #include // for Field #include // for MetaData #include -#include // for allocator, operator+, etc #include // for vector #include "gtest/gtest.h" // for AssertHelper #include "stk_mesh/base/Types.hpp" // for PartVector @@ -49,9 +47,10 @@ enum { SpaceDim = 3 }; TEST(UnitTestGmeshFixture, testUnit) { + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 4) { GTEST_SKIP(); } const size_t num_x = 1; const size_t num_y = 2; - const size_t num_z = 3; + const size_t num_z = 4; const size_t num_surf = 6; std::string config_mesh = std::to_string(num_x) + "x" + std::to_string(num_y) + "x" + diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp index c2957d78e73e..0a07a1462b7b 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp @@ -33,6 +33,7 @@ // #include "gtest/gtest.h" // for AssertHelper, ASSERT_TRUE +#include #include // for is_part_io_part #include // for StkMeshIoBroker #include // for BulkData @@ -130,4 +131,40 @@ TEST(StkMeshIoBroker, missingInputField) { unlink(fieldDataFile.c_str()); } +TEST(StkMeshIoBroker, testMissingInputField) { + const std::string fieldDataFile = "testMeshWithMissingFieldData.e"; + std::vector transientTimeSteps = {0.0, 1.0, 2.0}; + std::string transientFieldName = "transient_field"; + + write_mesh_with_transient_field_data(fieldDataFile, transientTimeSteps, transientFieldName); + + std::unique_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + stk::mesh::MetaData& meta = bulk->mesh_meta_data(); + + const stk::mesh::EntityRank rank = stk::topology::NODE_RANK; + + const std::string fieldNameBad = transientFieldName+"_scalar_bad_field"; + stk::mesh::Field &scalarFieldBad = meta.declare_field(rank, fieldNameBad, 1); + stk::mesh::put_field_on_mesh(scalarFieldBad, meta.universal_part(), nullptr); + stk::io::MeshField meshFieldBad(&scalarFieldBad, fieldNameBad); + + const std::string fieldNameGood = transientFieldName+"_scalar"; + stk::mesh::Field &scalarFieldGood = meta.declare_field(rank, fieldNameGood, 1); + stk::mesh::put_field_on_mesh(scalarFieldGood, meta.universal_part(), nullptr); + stk::io::MeshField meshFieldGood(&scalarFieldGood, fieldNameGood); + + stk::io::StkMeshIoBroker broker(MPI_COMM_WORLD); + + broker.set_bulk_data(*bulk); + broker.add_mesh_database(fieldDataFile, stk::io::READ_MESH); + broker.create_input_mesh(); + + EXPECT_FALSE(stk::io::verify_field_request(broker, meshFieldBad)); + EXPECT_TRUE(stk::io::verify_field_request(broker, meshFieldGood)); + + broker.populate_bulk_data(); + + unlink(fieldDataFile.c_str()); +} + } diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp index dd064bd2b7a7..6aa92e3d9685 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp @@ -125,7 +125,7 @@ TEST(UnitTestingOfBucket, testBucket) std::stringstream out1_str; out1_str << (*b1); bool equal = (gold1 == out1_str.str()); - ASSERT_TRUE(equal); + ASSERT_TRUE(equal)<<"expected str="< bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +/* shell-tri-3 mesh: */ +/* 3 */ +/* * */ +/* /|\ */ +/* / | \ */ +/* 1* | *4 */ +/* \ | / */ +/* \|/ */ +/* * */ +/* 2 */ +/* */ + const std::string meshDesc = + "0,1,SHELL_TRI_3_ALL_FACE_SIDES, 1,2,3, block_1\n\ + 0,2,SHELL_TRI_3_ALL_FACE_SIDES, 2,4,3, block_1"; + + std::vector coords = {0,1,0, 1,0,0, 1,2,0, 2,1,0}; + +//FIXME! text-mesh doesn't recognize the all-face-sides topologies. + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); + + EXPECT_EQ(0u, stk::mesh::count_selected_entities(bulk->mesh_meta_data().universal_part(), bulk->buckets(stk::topology::FACE_RANK))); + + bulk->modification_begin(); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + const unsigned sideOrdinal = 3; + stk::mesh::PartVector emptySideParts; + stk::mesh::Entity side = bulk->declare_element_side(elem1, sideOrdinal, emptySideParts); + bulk->modification_end(); + + EXPECT_EQ(stk::topology::SHELL_SIDE_BEAM_2, bulk->bucket(side).topology()); +} + +void check_ordinal_and_permutation(const stk::mesh::BulkData& bulk, + stk::mesh::Entity elem, + stk::mesh::EntityRank rank, + const stk::mesh::EntityVector& sideNodes, + stk::mesh::ConnectivityOrdinal expectedSideOrdinal, + stk::mesh::Permutation expectedPerm) +{ + stk::mesh::OrdinalAndPermutation ordPerm = + stk::mesh::get_ordinal_and_permutation(bulk, elem, rank, sideNodes); + EXPECT_EQ(expectedSideOrdinal, ordPerm.first); + EXPECT_EQ(expectedPerm, ordPerm.second); +} + +TEST(DeclareElementSide, shell_tri_3_all_face_sides_no_elem_graph) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +/* shell-tri-3 mesh: */ +/* 3 */ +/* * */ +/* /|\ */ +/* / | \ */ +/* 1* | *4 */ +/* \ | / */ +/* \|/ */ +/* * */ +/* 2 */ +/* */ + bulk->modification_begin(); + + stk::mesh::Part& shellPart = bulk->mesh_meta_data().declare_part_with_topology("shell_part", stk::topology::SHELL_TRI_3_ALL_FACE_SIDES); + + stk::mesh::EntityId elemId = 1; + stk::mesh::EntityIdVector nodeIds = {1, 2, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + elemId = 2; + nodeIds = {2, 4, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + bulk->modification_end(); + + EXPECT_EQ(0u, stk::mesh::count_selected_entities(bulk->mesh_meta_data().universal_part(), bulk->buckets(stk::topology::FACE_RANK))); + + bulk->modification_begin(); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + const unsigned sideOrdinal = 3; + stk::mesh::PartVector emptySideParts; + + stk::mesh::EntityVector sideNodes = { + bulk->get_entity(stk::topology::NODE_RANK, 2), + bulk->get_entity(stk::topology::NODE_RANK, 3) + }; + stk::mesh::ConnectivityOrdinal expectedSideOrdinal = sideOrdinal; + stk::mesh::Permutation expectedPerm = static_cast(0); + std::cout<<"checking elem1/sideNodes"<get_entity(stk::topology::ELEM_RANK, 2); + expectedSideOrdinal = 4; + stk::mesh::EntityVector reversedSideNodes = { + bulk->get_entity(stk::topology::NODE_RANK, 3), + bulk->get_entity(stk::topology::NODE_RANK, 2) + }; + expectedPerm = static_cast(0); + std::cout<<"checking elem2/reversedSideNodes"<(1); + std::cout<<"checking elem2/sideNodes"<declare_element_side(elem1, sideOrdinal, emptySideParts); + bulk->modification_end(); + + EXPECT_EQ(stk::topology::SHELL_SIDE_BEAM_2, bulk->bucket(side).topology()); + +//FIXME! +//The following expect should be '2u' but that fails because the side is only +//onnected to 1 element. (Note that this is the 'no-graph' version of this +//test, so the issue is not related to the face-adjacent-elem-graph.) + EXPECT_EQ(1u, bulk->num_connectivity(side, stk::topology::ELEM_RANK)); +} + +TEST(DeclareElementSide, shell_tri_3_all_face_sides_with_elem_graph) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +/* shell-tri-3 mesh: */ +/* 3 */ +/* * */ +/* /|\ */ +/* / | \ */ +/* 1* | *4 */ +/* \ | / */ +/* \|/ */ +/* * */ +/* 2 */ +/* */ + bulk->modification_begin(); + + stk::mesh::Part& shellPart = bulk->mesh_meta_data().declare_part_with_topology("shell_part", stk::topology::SHELL_TRI_3_ALL_FACE_SIDES); + + stk::mesh::EntityId elemId = 1; + stk::mesh::EntityIdVector nodeIds = {1, 2, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + elemId = 2; + nodeIds = {2, 4, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + bulk->modification_end(); + + bulk->initialize_face_adjacent_element_graph(); + + EXPECT_EQ(0u, stk::mesh::count_selected_entities(bulk->mesh_meta_data().universal_part(), bulk->buckets(stk::topology::FACE_RANK))); + + bulk->modification_begin(); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + const unsigned sideOrdinal = 3; + stk::mesh::PartVector emptySideParts; + stk::mesh::Entity side = bulk->declare_element_side(elem1, sideOrdinal, emptySideParts); + bulk->modification_end(); + + EXPECT_EQ(stk::topology::SHELL_SIDE_BEAM_2, bulk->bucket(side).topology()); + EXPECT_EQ(2u, bulk->num_connectivity(side, stk::topology::ELEM_RANK)); +} + +TEST(GetSides, hex8) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + stk::io::fill_mesh("generated:1x1x1|sideset:xXyYzZ", *bulk); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + EXPECT_TRUE(bulk->is_valid(elem1)); + EXPECT_EQ(stk::topology::HEX_8, bulk->bucket(elem1).topology()); + + EXPECT_EQ(6u, stk::mesh::num_sides(*bulk, elem1)); + + stk::mesh::EntityVector sides = stk::mesh::get_sides(*bulk, elem1); + std::vector sideOrds = stk::mesh::get_side_ordinals(*bulk, elem1); + ASSERT_EQ(6u, sides.size()); + ASSERT_EQ(6u, sideOrds.size()); + EXPECT_EQ(stk::topology::FACE_RANK, bulk->entity_rank(sides[0])); + EXPECT_EQ(stk::topology::FACE_RANK, bulk->entity_rank(sides[1])); +} + +TEST(GetSides, textmesh_shell_quad_4_EdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); //shell-quad-4 mesh: // 6 @@ -1165,7 +1359,18 @@ TEST(CreateAndWrite, DISABLED_textmesh_shell_quad_4_EdgeSides) stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); - stk::io::write_mesh("shellq4_edge_sides.g", *bulk); + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + EXPECT_TRUE(bulk->is_valid(elem1)); + EXPECT_EQ(stk::topology::SHELL_QUAD_4, bulk->bucket(elem1).topology()); + + EXPECT_EQ(2u, stk::mesh::num_sides(*bulk, elem1)); + + stk::mesh::EntityVector sides = stk::mesh::get_sides(*bulk, elem1); + std::vector sideOrds = stk::mesh::get_side_ordinals(*bulk, elem1); + ASSERT_EQ(2u, sides.size()); + ASSERT_EQ(2u, sideOrds.size()); + EXPECT_EQ(stk::topology::EDGE_RANK, bulk->entity_rank(sides[0])); + EXPECT_EQ(stk::topology::EDGE_RANK, bulk->entity_rank(sides[1])); } TEST(CreateAndWrite, DISABLED_textmesh_shell_quad_4_FullExteriorSkin) @@ -1607,7 +1812,6 @@ TEST(Skinning, createSidesForShellQuad4Block) // 1*----*----*7 // 4 // - stk::mesh::Part& skinPart = bulk->mesh_meta_data().declare_part("mySkin"); const std::string meshDesc = "0,1,SHELL_QUAD_4, 1,4,5,2, block_1\n\ 0,2,SHELL_QUAD_4, 2,5,6,3, block_1\n\ @@ -1620,8 +1824,42 @@ TEST(Skinning, createSidesForShellQuad4Block) stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); + auto skinPart = bulk->mesh_meta_data().get_part("surface_1"); + EXPECT_EQ(0u, stk::mesh::count_entities(*bulk, stk::topology::FACE_RANK, *skinPart)); + EXPECT_EQ(8u, stk::mesh::count_entities(*bulk, stk::topology::EDGE_RANK, *skinPart)); +} + +TEST(Skinning, createSidesForShellQuad4BlockExposedBoundary) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + std::unique_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +//shell-quad-4 mesh: +// 6 +// 3*----*----*9 +// | E2 | E4 | +// | | | +// 2*---5*----*8 +// | E1 | E3 | +// | | | +// 1*----*----*7 +// 4 +// + stk::mesh::Part& skinPart = bulk->mesh_meta_data().declare_part("mySkin"); + const std::string meshDesc = + "0,1,SHELL_QUAD_4, 1,4,5,2, block_1\n\ + 0,2,SHELL_QUAD_4, 2,5,6,3, block_1\n\ + 0,3,SHELL_QUAD_4, 4,7,8,5, block_1\n\ + 0,4,SHELL_QUAD_4, 5,8,9,6, block_1|sideset:name=surface_1"; + + std::vector coords = {0,0,0, 0,1,0, 0,2,0, + 1,0,0, 1,1,0, 1,2,0, + 2,0,0, 2,1,0, 2,2,0}; + + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); + stk::mesh::create_exposed_block_boundary_sides(*bulk, bulk->mesh_meta_data().universal_part(), stk::mesh::PartVector{&skinPart}); EXPECT_EQ(8u, stk::mesh::count_entities(*bulk, stk::topology::FACE_RANK, skinPart)); + EXPECT_EQ(0u, stk::mesh::count_entities(*bulk, stk::topology::EDGE_RANK, skinPart)); } TEST(Skinning, createSidesForShellQuad8Block) @@ -1768,3 +2006,132 @@ TEST(CreateAndConvert, read_write_shell_4_all_face_sides) unlink(fileName.c_str()); } + +class CreateReadAndWrite : public stk::unit_test_util::MeshFixture +{ + protected: + std::string get_meshspec_single_shell_quad4_with_all_sides() { + //shell-quad-4 mesh: + // + // 4*---3* + // | E1 | + // | | + // 1*---2* + // + // + const std::string meshDesc = + "0,1,SHELL_QUAD_4, 1,2,3,4, block_1\n\ + |sideset:name=surface_1; data=1,1, 1,2, 1,3, 1,4, 1,5, 1,6; split=topology"; + + std::vector coords = {0,0,0, 1,0,0, 1,1,0, 0,1,0}; + + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords); + } + + std::string get_meshspec_four_shell_quad4_with_sideset() { + //shell-quad-4 mesh: + // 6 + // 3*----*----*9 + // | E2 | E4 | + // | | | + // 2*---5*----*8 + // | E1 | E3 | + // | | | + // 1*----*----*7 + // 4 + // + const std::string meshDesc = + "0,1,SHELL_QUAD_4, 1,4,5,2, block_1\n\ + 0,2,SHELL_QUAD_4, 2,5,6,3, block_1\n\ + 0,3,SHELL_QUAD_4, 4,7,8,5, block_1\n\ + 0,4,SHELL_QUAD_4, 5,8,9,6, block_1\ + |sideset:name=surface_1; data=1,3, 3,3, 3,4, 4,4, 4,5, 2,5, 2,6, 1,6; split=topology"; + + std::vector coords = {0,0,0, 0,1,0, 0,2,0, + 1,0,0, 1,1,0, 1,2,0, + 2,0,0, 2,1,0, 2,2,0}; + + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords); + } + + void create_1_shell_using_ioss_text_mesh(stk::mesh::BulkData& bulk) { + stk::io::fill_mesh("textmesh:" + get_meshspec_single_shell_quad4_with_all_sides(), bulk); + } + + void create_4_shells_using_stk_text_mesh(stk::mesh::BulkData& bulk) { + stk::unit_test_util::setup_text_mesh(bulk, get_meshspec_four_shell_quad4_with_sideset()); + } + + void create_4_shells_using_ioss_text_mesh(stk::mesh::BulkData& bulk) { + stk::io::fill_mesh("textmesh:" + get_meshspec_four_shell_quad4_with_sideset(), bulk); + } + + void check_mesh_properties(stk::mesh::BulkData& bulk, std::vector val) { + stk::mesh::EntityVector entities; + stk::mesh::get_entities(bulk, stk::topology::ELEM_RANK, entities); + + for (auto entity : entities) { + EXPECT_EQ(val[0], bulk.num_nodes(entity)) << bulk.entity_key(entity); + EXPECT_EQ(val[1], bulk.num_edges(entity)) << bulk.entity_key(entity); + EXPECT_EQ(val[2], bulk.num_faces(entity)) << bulk.entity_key(entity); + EXPECT_EQ(val[3], bulk.num_sides(entity)) << bulk.entity_key(entity); + } + + EXPECT_EQ(val[4], stk::mesh::count_selected_entities(bulk.mesh_meta_data().locally_owned_part(), bulk.buckets(stk::topology::EDGE_RANK))); + EXPECT_EQ(val[5], stk::mesh::count_selected_entities(bulk.mesh_meta_data().locally_owned_part(), bulk.buckets(stk::topology::FACE_RANK))); + } +}; + +TEST_F(CreateReadAndWrite, DISABLED_stk_textmesh_shell_quad_4_EdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk1 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + std::shared_ptr bulk2 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + std::string fileName("shell_quad4_edge_sides_test.g"); + create_4_shells_using_stk_text_mesh(*bulk1); + stk::io::write_mesh(fileName, *bulk1); + check_mesh_properties(*bulk1, {4, 2, 0, 2, 8, 0}); + + stk::io::fill_mesh(fileName, *bulk2); + check_mesh_properties(*bulk2, {4, 2, 0, 2, 8, 0}); + + unlink(fileName.c_str()); +} + +TEST_F(CreateReadAndWrite, ioss_textmesh_shell_quad_4_EdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk1 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + std::shared_ptr bulk2 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + std::string fileName("shell_quad4_edge_sides_test.g"); + create_4_shells_using_ioss_text_mesh(*bulk1); + stk::io::write_mesh(fileName, *bulk1); + check_mesh_properties(*bulk1, {4, 2, 0, 2, 8, 0}); + + stk::io::fill_mesh(fileName, *bulk2); + check_mesh_properties(*bulk2, {4, 2, 0, 2, 8, 0}); + + unlink(fileName.c_str()); +} + +TEST_F(CreateReadAndWrite, ioss_textmesh_shell_quad_4_FaceAndEdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk1 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + std::shared_ptr bulk2 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + std::string fileName("shell_quad4_face_and_edge_sides_test.g"); + create_1_shell_using_ioss_text_mesh(*bulk1); + stk::io::write_mesh(fileName, *bulk1); + check_mesh_properties(*bulk1, {4, 4, 2, 6, 4, 2}); + + stk::io::fill_mesh(fileName, *bulk2); + check_mesh_properties(*bulk2, {4, 4, 2, 6, 4, 2}); + + unlink(fileName.c_str()); +} diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp index cb07bb52eb81..df30626384ae 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp @@ -43,8 +43,11 @@ #include #include -template using NgpDebugger = stk::mesh::NgpFieldSyncDebugger; -template using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; +template +using NgpDebugger = stk::mesh::NgpFieldSyncDebugger; + +template +using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; void extract_warning(std::string & stdoutString, int numExpectedOccurrences, const std::string & warningString); @@ -107,6 +110,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture stk::mesh::Selector & fieldParts, unsigned numStates = 1) { + get_meta().enable_field_sync_debugger(); const T init = 1; stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); stk::mesh::put_field_on_mesh(field, fieldParts, &init); @@ -119,6 +123,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture unsigned numComponents, stk::mesh::Selector & fieldParts) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 1; const std::vector init(numComponents, 1); stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); @@ -200,13 +205,13 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture template void initialize_ngp_field(stk::mesh::Field & stkField) { - stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::get_updated_ngp_field(stkField); } template void initialize_ngp_field(stk::mesh::FieldBase & stkField) { - stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::get_updated_ngp_field(stkField); } template @@ -393,7 +398,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture { const int component = 0; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); ngpField.sync_to_device(); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, selector, @@ -412,7 +417,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void write_vector_field_on_device(stk::mesh::FieldBase & stkField, const stk::mesh::Selector& selector, T value) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); ngpField.sync_to_device(); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, selector, @@ -434,7 +439,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void device_field_set_all(stk::mesh::Field & stkField, T value) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); ngpField.set_all(ngpMesh, value); } @@ -490,7 +495,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void read_field_on_device(stk::mesh::FieldBase & stkField, const stk::mesh::Selector& selector) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), selector); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -568,7 +573,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void read_field_on_device_using_entity_field_data(stk::mesh::Field & stkField) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), stkField); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -592,7 +597,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void read_field_on_device_using_mesh_index(stk::mesh::Field & stkField) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), stkField); stk::mesh::EntityRank rank = ngpField.get_rank(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp index e96770e3cbe3..712c0b47d652 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include #include @@ -47,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -96,18 +99,82 @@ class NgpMeshTest : public stk::mesh::fixtures::TestHexFixture numNodesVec.copy_device_to_host(); ASSERT_EQ(8u, numNodesVec[0]); } + + void run_edge_check(unsigned numExpectedEdgesPerElem) + { + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); + stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, get_meta().universal_part(), + KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& entityIndex) { + stk::mesh::ConnectedEntities edges = ngpMesh.get_edges(stk::topology::ELEM_RANK, entityIndex); + NGP_EXPECT_EQ(numExpectedEdgesPerElem, edges.size()); + } + ); + } + + void delete_edge_on_each_element() + { + get_bulk().modification_begin(); + + stk::mesh::Entity elem1 = get_bulk().get_entity(stk::topology::ELEM_RANK, 1); + stk::mesh::ConnectedEntities edges = get_bulk().get_connected_entities(elem1, stk::topology::EDGE_RANK); + stk::mesh::ConnectedEntities edgeElems = get_bulk().get_connected_entities(edges[0], stk::topology::ELEM_RANK); + EXPECT_EQ(1u, edgeElems.size()); + EXPECT_EQ(elem1, edgeElems[0]); + + const stk::mesh::ConnectivityOrdinal* edgeElemOrds = get_bulk().begin_ordinals(edges[0], stk::topology::ELEM_RANK); + stk::mesh::Entity edge = edges[0]; + EXPECT_TRUE(get_bulk().destroy_relation(elem1, edge, edgeElemOrds[0])); + EXPECT_TRUE(get_bulk().destroy_entity(edge)); + + stk::mesh::Entity elem2 = get_bulk().get_entity(stk::topology::ELEM_RANK, 2); + edges = get_bulk().get_connected_entities(elem2, stk::topology::EDGE_RANK); + EXPECT_EQ(12u, edges.size()); + edgeElems = get_bulk().get_connected_entities(edges[5], stk::topology::ELEM_RANK); + EXPECT_EQ(1u, edgeElems.size()); + EXPECT_EQ(elem2, edgeElems[0]); + edgeElemOrds = get_bulk().begin_ordinals(edges[5], stk::topology::ELEM_RANK); + edge = edges[5]; + EXPECT_TRUE(get_bulk().destroy_relation(elem2, edge, edgeElemOrds[0])); + EXPECT_TRUE(get_bulk().destroy_entity(edge)); + + get_bulk().modification_end(); + } }; -TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex) +NGP_TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex) { run_get_nodes_using_FastMeshIndex_test(); } -TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex_custom_NgpMemSpace) +NGP_TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex_custom_NgpMemSpace) { run_get_nodes_using_FastMeshIndex_test(); } +NGP_TEST_F(NgpMeshTest, hexes_with_edges_update_connectivity) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + + setup_mesh(1,1,2); + stk::mesh::get_updated_ngp_mesh(get_bulk()); + + stk::mesh::Part& edgePart = get_meta().declare_part("edges", stk::topology::EDGE_RANK); + + stk::mesh::create_edges(get_bulk(), get_meta().universal_part(), &edgePart); + stk::mesh::get_updated_ngp_mesh(get_bulk()); + + EXPECT_EQ(20u, stk::mesh::count_entities(get_bulk(), stk::topology::EDGE_RANK, edgePart)); + + unsigned numExpectedEdgesPerElement = 12; + run_edge_check(numExpectedEdgesPerElement); + + delete_edge_on_each_element(); + EXPECT_EQ(18u, stk::mesh::count_entities(get_bulk(), stk::topology::EDGE_RANK, edgePart)); + + numExpectedEdgesPerElement = 11; + run_edge_check(numExpectedEdgesPerElement); +} + class NgpMeshRankLimit : public stk::mesh::fixtures::TestHexFixture {}; TEST_F(NgpMeshRankLimit, tooManyRanksThrowWithMessage) @@ -247,6 +314,60 @@ NGP_TEST_F(NgpMeshTest, volatileFastSharedCommMap_custom_NgpMemSpace) } } +void test_ngp_permutations_1side_2perms(const stk::mesh::BulkData& mesh, + const stk::mesh::Part& sidePart) +{ + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(mesh); + + stk::mesh::EntityRank sideRank = mesh.mesh_meta_data().side_rank(); + stk::mesh::EntityVector sides; + stk::mesh::get_entities(mesh, sideRank, sidePart, sides); + EXPECT_EQ(1u, sides.size()); + EXPECT_EQ(2u, mesh.num_connectivity(sides[0], stk::topology::ELEM_RANK)); + const stk::mesh::Permutation* hostPerms = mesh.begin_permutations(sides[0], stk::topology::ELEM_RANK); + stk::mesh::Permutation expectedPerm1 = hostPerms[0]; + stk::mesh::Permutation expectedPerm2 = hostPerms[1]; + + stk::mesh::for_each_entity_run(ngpMesh, sideRank, sidePart, + KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& sideIndex) { + stk::mesh::NgpMesh::Permutations perms = ngpMesh.get_permutations(sideRank, sideIndex, stk::topology::ELEM_RANK); + NGP_EXPECT_EQ(2u, perms.size()); + NGP_EXPECT_EQ(expectedPerm1, perms[0]); + NGP_EXPECT_EQ(expectedPerm2, perms[1]); + }); +} + +NGP_TEST(TestNgpMesh, permutations) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + + std::string meshDesc = + "0,1,TRI_3_2D,1,2,3,block_1\n" + "0,2,TRI_3_2D,2,4,3,block_2\n" + "|dimension:2|sideset:name=surface_1; data=1,2"; + + std::shared_ptr mesh = stk::mesh::MeshBuilder(MPI_COMM_WORLD) + .set_spatial_dimension(2).create(); + stk::unit_test_util::setup_text_mesh(*mesh, meshDesc); + + stk::mesh::EntityRank sideRank = mesh->mesh_meta_data().side_rank(); + stk::mesh::Part* sidePart = mesh->mesh_meta_data().get_part("surface_1"); + STK_ThrowAssertMsg(sidePart != nullptr, "failed to find part for surface_1"); + + stk::mesh::EntityVector sides; + stk::mesh::get_entities(*mesh, sideRank, *sidePart, sides); + EXPECT_EQ(1u, sides.size()); + EXPECT_EQ(2u, mesh->num_connectivity(sides[0], stk::topology::ELEM_RANK)); + + stk::mesh::Permutation expectedPerm1 = static_cast(0); + stk::mesh::Permutation expectedPerm2 = static_cast(1); + const stk::mesh::Permutation* permutations = mesh->begin_permutations(sides[0], stk::topology::ELEM_RANK); + EXPECT_EQ(expectedPerm1, permutations[0]); + EXPECT_EQ(expectedPerm2, permutations[1]); + + test_ngp_permutations_1side_2perms(*mesh, *sidePart); +} + namespace { double reduce_on_host(stk::mesh::BulkData& bulk) { @@ -279,6 +400,27 @@ TEST(NgpHostMesh, FieldForEachEntityReduceOnHost_fromTylerVoskuilen) EXPECT_EQ(1.0, maxZ); } +TEST(NgpDeviceMesh, dont_let_stacksize_get_out_of_control) +{ + constexpr size_t tol = 50; + +#ifdef SIERRA_MIGRATION + constexpr size_t expectedBulkDataSize = 1320; +#else + constexpr size_t expectedBulkDataSize = 1256; +#endif + EXPECT_NEAR(expectedBulkDataSize, sizeof(stk::mesh::BulkData), tol); + + constexpr size_t expectedBucketSize = 1120; + EXPECT_NEAR(expectedBucketSize, sizeof(stk::mesh::Bucket), tol); + + constexpr size_t expectedDeviceMeshSize = 472; + EXPECT_NEAR(expectedDeviceMeshSize, sizeof(stk::mesh::DeviceMesh), tol); + + constexpr size_t expectedDeviceBucketSize = 264; + EXPECT_NEAR(expectedDeviceBucketSize, sizeof(stk::mesh::DeviceBucket), tol); +} + void add_elements(std::unique_ptr& bulk) { stk::mesh::MetaData& meta = bulk->mesh_meta_data(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp index 4c869fb317de..89ef24dfc2ca 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp @@ -16,7 +16,7 @@ namespace { -using IntDualViewType = Kokkos::DualView; +using UnsignedDualViewType = Kokkos::DualView; void test_view_of_fields(const stk::mesh::BulkData& bulk, stk::mesh::Field& field1, @@ -39,19 +39,19 @@ void test_view_of_fields(const stk::mesh::BulkData& bulk, Kokkos::deep_copy(fields, hostFields); unsigned numResults = 2; - IntDualViewType result = ngp_unit_test_utils::create_dualview("result",numResults); + UnsignedDualViewType result = ngp_unit_test_utils::create_dualview("result",numResults); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 2), KOKKOS_LAMBDA(const unsigned& i) { - result.d_view(i) = fields(i).get_ordinal() == i ? 1 : 0; + result.d_view(i) = fields(i).get_ordinal(); }); - result.modify(); - result.sync(); + result.modify(); + result.sync(); - EXPECT_EQ(1, result.h_view(0)); - EXPECT_EQ(1, result.h_view(1)); + EXPECT_EQ(hostFields(0).get_ordinal(), result.h_view(0)); + EXPECT_EQ(hostFields(1).get_ordinal(), result.h_view(1)); #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) for (unsigned i = 0; i < 2; ++i) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp index 78a62cadccb6..2ac1ae71994c 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp @@ -172,8 +172,8 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture } } - template class NgpDebugger = stk::mesh::DefaultNgpFieldSyncDebugger> - void write_scalar_host_field_on_device(stk::mesh::HostField & hostField, T value) + template class NgpDebugger = stk::mesh::DefaultNgpFieldSyncDebugger> + void write_scalar_host_field_on_device(stk::mesh::HostField & hostField, T value) { const int component = 0; stk::mesh::HostMesh hostMesh(get_bulk()); @@ -205,7 +205,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const int component = 1; // Just write to the second component stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), meta.locally_owned_part()); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -226,7 +226,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const int component = 1; // Just write to the second component stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, meta.locally_owned_part(), KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& entity) { @@ -269,7 +269,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), meta.locally_owned_part()); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -334,6 +334,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture template stk::mesh::Field & create_scalar_field(stk::topology::rank_t rank, const std::string & name) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 1; const T init = 1; stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); @@ -344,6 +345,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture template stk::mesh::Field & create_scalar_multistate_field(stk::topology::rank_t rank, const std::string & name) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 2; const T init = 1; stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); @@ -354,6 +356,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture template stk::mesh::Field & create_vector_field(stk::topology::rank_t rank, const std::string & name) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 1; unsigned numScalarsPerEntity = 3; const T init[] = {1, 2, 3}; @@ -375,6 +378,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const std::vector> & numElemsInEachPart) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Field & stkField = create_scalar_field(stk::topology::ELEM_RANK, fieldName); create_parts(numElemsInEachPart); @@ -395,6 +399,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const std::vector> & numElemsInEachPart) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Field & stkField = create_scalar_field(stk::topology::ELEM_RANK, fieldName); create_parts(numElemsInEachPart); @@ -416,6 +421,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const std::vector> & numElemsInEachPart) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Field & stkField = create_vector_field(stk::topology::ELEM_RANK, fieldName); create_parts(numElemsInEachPart); @@ -2237,7 +2243,7 @@ TEST_F(NgpDebugFieldSync, ForcedDebugger_HostField_UsageNotProblematic_UsingEnti { if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) return; stk::mesh::Field & stkField = build_mesh_with_scalar_field("doubleScalarField", {{2, "Part1"}}); - stk::mesh::HostField hostField(get_bulk(), stkField); + stk::mesh::HostField hostField(get_bulk(), stkField); testing::internal::CaptureStdout(); write_scalar_host_field_on_device(hostField, 3.14); @@ -2251,7 +2257,7 @@ TEST_F(NgpDebugFieldSync, ForcedDebugger_HostField_UsageNotProblematic_UsingBuck { if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) return; stk::mesh::Field & stkField = build_mesh_with_scalar_field("doubleScalarField", {{2, "Part1"}}); - stk::mesh::HostField hostField(get_bulk(), stkField); + stk::mesh::HostField hostField(get_bulk(), stkField); testing::internal::CaptureStdout(); write_scalar_host_field_on_device(hostField, 3.14); @@ -2267,6 +2273,7 @@ class NgpDebugFieldSync_SeparateFieldRestrictions : public NgpDebugFieldSyncFixt void setup_mesh_and_field_with_multiple_restrictions(const std::string& fieldName) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Part& part1 = get_meta().declare_part_with_topology("Part1", stk::topology::HEX_8); stk::mesh::Part& part2 = get_meta().declare_part_with_topology("Part2", stk::topology::HEX_8); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp index 815c3f86c40b..e8cf1e4bb8d8 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp @@ -102,7 +102,7 @@ class NgpDebugFieldSync_AccessDuringMeshModification : public NgpDebugFieldSyncF { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), meta.locally_owned_part()); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -1372,7 +1372,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_C declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part2", "Part1"}}, stkField, 3.14); @@ -1392,7 +1392,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_C declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element_with_scalar_field_write_using_entity({{3, "Part1"}, {4, "Part1"}}, stkField, 3.14); @@ -1413,7 +1413,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_D declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element_with_scalar_field_write_using_entity({2}, stkField, 3.14); @@ -1433,7 +1433,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_M declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part2", "Part1"}}, stkField, 3.14); @@ -1459,7 +1459,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_C declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element_with_scalar_field_write_using_entity({{3, "Part1"}, {4, "Part1"}}, stkField, 3.14); @@ -1486,7 +1486,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_D declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element_with_scalar_field_write_using_entity({2}, stkField, 3.14); @@ -2111,7 +2111,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, TwoConsecutiveMods_Change declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -2133,7 +2133,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, TwoConsecutiveMods_Create declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element_with_scalar_field_write_using_entity({{3, "Part1"}, {4, "Part1"}}, stkField, 3.14); @@ -2155,7 +2155,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, TwoConsecutiveMods_Delete declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element_with_scalar_field_write_using_entity({2}, stkField, 3.14); @@ -2446,7 +2446,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_T declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -2469,7 +2469,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_T declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); const stk::mesh::EntityId maxIdToRead = 1; // Avoid memory corruption due to accessing old Field after new bucket allocation @@ -2493,7 +2493,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_T declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp index a4064c4fa6fc..d7784e7b2e12 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp @@ -389,7 +389,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, ChangeBucket_MissingDeviceFieldUpdate declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part2", "Part1"}}); @@ -411,7 +411,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, CreateBucket_MissingDeviceFieldUpdate declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -434,7 +434,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, DeleteBucket_MissingDeviceFieldUpdate declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -455,7 +455,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, ModifyBucket_StaleDeviceFieldCopy_Acc declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part2", "Part1"}}); @@ -482,7 +482,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, CreateBucket_StaleDeviceFieldCopy_Acc declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -510,7 +510,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, DeleteBucket_StaleDeviceFieldCopy_Acc declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -536,7 +536,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, ModifyBucket_StaleDeviceFieldCopy_Cle declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part2", "Part1"}}); @@ -563,7 +563,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, CreateBucket_StaleDeviceFieldCopy_Cle declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -591,7 +591,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, DeleteBucket_StaleDeviceFieldCopy_Cle declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -1138,7 +1138,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoConsecutiveMods_ChangeBucket_Chang declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -1162,7 +1162,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoConsecutiveMods_CreateBucket_Creat declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -1186,7 +1186,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoConsecutiveMods_DeleteBucket_Delet declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -1788,7 +1788,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoMods_ChangeBucket_ChangeBucket_Mis declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -1813,7 +1813,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoMods_CreateBucket_CreateBucket_Mis declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); const stk::mesh::EntityId maxIdToRead = 1; // Avoid memory corruption due to accessing old Field after new bucket allocation @@ -1839,7 +1839,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoMods_DeleteBucket_DeleteBucket_Mis declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp index 8c7e854e1c9e..455fe72e0f43 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp @@ -45,7 +45,7 @@ class NgpDebugFieldSync_PartialAllocation : public NgpDebugFieldSyncFixture { const int component = 0; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::mesh::Selector fieldSelector(stkField); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, fieldSelector, @@ -64,7 +64,7 @@ class NgpDebugFieldSync_PartialAllocation : public NgpDebugFieldSyncFixture { const int component = 0; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), stkField); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -1324,6 +1324,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, EmptyField_MeshModification_Properly if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) return; setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); create_parts({"Part1", "Part2", "Part3"}); + get_meta().enable_field_sync_debugger(); get_meta().declare_field(stk::topology::ELEM_RANK, "doubleScalarField", 1); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); @@ -1560,7 +1561,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}); @@ -1582,7 +1583,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}, stkField, 3.14); @@ -1603,7 +1604,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}); @@ -1628,7 +1629,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 2}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); batch_modify_element_part_membership({{3, "Part3", "Part2"}}); @@ -1654,7 +1655,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}, stkField, 3.14); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp index 7e70384d89f9..a9edbc02b662 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp @@ -441,4 +441,33 @@ TEST_F(NgpBatchChangeEntityParts, failedHostAccessAfterDeviceMeshMod) } } +TEST_F(NgpBatchChangeEntityParts, impl_addPartToNode_ngpDevice) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) GTEST_SKIP(); + + build_empty_mesh(1, 1); + + stk::mesh::Part & part1 = m_meta->declare_part_with_topology("part1", stk::topology::NODE); + stk::mesh::Part & part2 = m_meta->declare_part_with_topology("part2", stk::topology::NODE); + const unsigned nodeId = 1; + const stk::mesh::Entity node1 = create_node(*m_bulk, nodeId, {&part1}); + check_bucket_layout(*m_bulk, {{{"part1"}, {nodeId}}}, stk::topology::NODE_RANK); + + DeviceEntitiesType entities("deviceEntities", 1); + DevicePartOrdinalsType addPartOrdinals("deviceAddParts", 1); + DevicePartOrdinalsType removePartOrdinals("deviceRemoveParts", 0); + + stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(*m_bulk); + fill_device_views_add_remove_part_from_node(entities, addPartOrdinals, removePartOrdinals, ngpMesh, + node1, &part2, nullptr); + + ngpMesh.impl_batch_change_entity_parts(entities, addPartOrdinals, removePartOrdinals); +// confirm_host_mesh_is_not_synchronized_from_device(ngpMesh); +// +// ngpMesh.sync_to_host(); +// confirm_host_mesh_is_synchronized_from_device(ngpMesh); +// +// check_bucket_layout(*m_bulk, {{{"part1", "part2"}, {nodeId}}}, stk::topology::NODE_RANK); +} + } // namespace diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp index 6031e1b1ac84..d0b61df0ec13 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp @@ -108,7 +108,7 @@ class TestTranspose : public ::testing::Test void setup_views(unsigned numBuckets, double overallocationFactor) { - deviceFieldData = stk::mesh::FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "deviceFieldData"), numBuckets, ORDER_INDICES(bucketCapacity, numPerEntity)); + deviceFieldData = stk::mesh::FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "deviceFieldData"), numBuckets, ORDER_INDICES(bucketCapacity, numPerEntity)); goldHostFieldData = stk::mesh::FieldDataHostViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "goldHostFieldData"), numBuckets, ORDER_INDICES(bucketCapacity,numPerEntity)); fill_gold_host_field_data(numBuckets); @@ -171,7 +171,7 @@ class TestTranspose : public ::testing::Test stk::mesh::FieldDataPointerHostViewType hostBucketPtrData; stk::mesh::FieldDataPointerDeviceViewType deviceBucketPtrData; - stk::mesh::FieldDataDeviceViewType deviceFieldData; + stk::mesh::FieldDataDeviceViewType deviceFieldData; stk::mesh::FieldDataHostViewType goldHostFieldData; stk::mesh::UnsignedViewType deviceBucketSizes; diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp index 8cd3fb2dafad..40fa498d66fb 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp @@ -1768,6 +1768,7 @@ TEST_F(NgpFieldFixture, LateFieldUsage) get_meta().enable_late_fields(); stk::mesh::Field & stkLateIntField = create_field(stk::topology::ELEM_RANK, "lateIntField"); + initialize_ngp_field(stkIntField); // Must update early fields after adding late field initialize_ngp_field(stkLateIntField); int multiplier = 2; @@ -2039,17 +2040,6 @@ TEST(DeviceField, checkSizeof) EXPECT_TRUE(sizeof(stk::mesh::DeviceField) <= expectedNumBytes); } -TEST(DeviceBucket, checkSizeof) -{ -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after 2024/06/26 - size_t expectedNumBytes = 176; -#else - size_t expectedNumBytes = 152; // Value after removing DeviceBucket::m_hostEntities -#endif - std::cout << "sizeof(stk::mesh::DeviceBucket): " << sizeof(stk::mesh::DeviceBucket) << std::endl; - EXPECT_TRUE(sizeof(stk::mesh::DeviceBucket) <= expectedNumBytes); -} - enum PartIds : int { part_1 = 1, @@ -2632,4 +2622,76 @@ TEST_F(NgpFieldUpdate, MoveBackwardForwardBackward) check_field_values(); } +class NgpFieldExecSpaceTestFixture : public stk::unit_test_util::MeshFixture +{ +public: + void setup_empty_mesh_and_field() + { + setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + + const std::vector init(1, 1); + stk::mesh::Field& field = get_meta().declare_field(stk::topology::ELEM_RANK, "", 1); + stk::mesh::put_field_on_mesh(field, get_meta().universal_part(), 1, init.data()); + } + + auto get_default_field() + { + return get_meta().get_field(stk::topology::ELEM_RANK, ""); + } +}; + +TEST_F(NgpFieldExecSpaceTestFixture, CheckValidMemSpace) +{ + if (get_parallel_size() != 1) GTEST_SKIP(); + setup_empty_mesh_and_field(); + auto field = get_default_field(); + + EXPECT_NO_THROW((stk::mesh::get_updated_ngp_field(*field))); + + EXPECT_NO_THROW((stk::mesh::get_updated_ngp_field(*field))); + +#ifdef STK_ENABLE_GPU + EXPECT_ANY_THROW( +#else + EXPECT_NO_THROW( +#endif + (stk::mesh::get_updated_ngp_field(*field))); +} + +TEST_F(NgpFieldExecSpaceTestFixture, CheckSameMemSpace) +{ + if (get_parallel_size() != 1) GTEST_SKIP(); + setup_empty_mesh_and_field(); + auto field = get_default_field(); + + auto& ngpField1 = stk::mesh::get_updated_ngp_field(*field); + auto& ngpField2 = stk::mesh::get_updated_ngp_field(*field); + + EXPECT_TRUE((std::is_same_v::MemSpace, stk::mesh::NgpMeshDefaultMemSpace>)); + EXPECT_TRUE((std::is_same_v::MemSpace, std::remove_reference_t::MemSpace>)); +} + +TEST_F(NgpFieldExecSpaceTestFixture, UseNonDefaultMemSpace) +{ + if (get_parallel_size() != 1) GTEST_SKIP(); + setup_empty_mesh_and_field(); + auto field = get_default_field(); + + EXPECT_NO_THROW((stk::mesh::get_updated_ngp_field(*field))); + +#ifdef STK_ENABLE_GPU + EXPECT_ANY_THROW( +#else + EXPECT_NO_THROW( +#endif + (stk::mesh::get_updated_ngp_field(*field))); + +#ifdef STK_ENABLE_GPU + EXPECT_ANY_THROW( +#else + EXPECT_NO_THROW( +#endif + (stk::mesh::get_updated_ngp_field(*field))); +} + } // namespace ngp_field_test diff --git a/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp b/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp index ec39418446e8..0089d3514845 100644 --- a/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp +++ b/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp @@ -74,7 +74,8 @@ void check_field(std::shared_ptr bulkDataPtr, stk::mesh::Fi { const stk::mesh::FieldBase& coordField = *(bulkDataPtr->mesh_meta_data_ptr()->coordinate_field()); - stk::mesh::Selector selector(field); + auto meshMetaDataPtr = bulkDataPtr->mesh_meta_data_ptr(); + stk::mesh::Selector selector(field & (meshMetaDataPtr->locally_owned_part() | meshMetaDataPtr->globally_shared_part())); const stk::mesh::BucketVector& buckets = bulkDataPtr->get_buckets(stk::topology::NODE_RANK, selector); for (stk::mesh::Bucket* bucket : buckets) @@ -100,9 +101,9 @@ void check_field(std::shared_ptr bulkDataPtr, stk::mesh::Fi TEST(StkFieldCopier, MiddleMeshToStk) { - std::string meshFileName1 = "generated:3x3x1|sideset:Z|bbox:0,0,0,1,1,1"; + std::string meshFileName1 = "generated:3x3x2|sideset:Z|bbox:0,0,0,1,1,1"; std::string partName1 = "surface_1"; - stk_interface::StkMeshCreator creator1(meshFileName1, "NONE", MPI_COMM_WORLD); + stk_interface::StkMeshCreator creator1(meshFileName1, "RCB", MPI_COMM_WORLD); stk_interface::MeshPart meshPart = creator1.create_mesh_from_part(partName1); mesh::FieldPtr meshField = mesh::create_field(meshPart.mesh, mesh::FieldShape(2, 0, 0), 3); @@ -121,7 +122,7 @@ TEST(StkFieldCopier, MiddleMeshToStk) TEST(StkFieldCopier, StkToMiddleMesh) { - std::string meshFileName1 = "generated:3x3x1|sideset:Z|bbox:0,0,0,1,1,1"; + std::string meshFileName1 = "generated:3x3x2|sideset:Z|bbox:0,0,0,1,1,1"; std::string partName1 = "surface_1"; stk_interface::StkMeshCreator creator1(meshFileName1, "NONE", MPI_COMM_WORLD); stk_interface::MeshPart meshPart = creator1.create_mesh_from_part(partName1); @@ -139,4 +140,4 @@ TEST(StkFieldCopier, StkToMiddleMesh) copier.copy(meshField, *stkField); check_field(creator1.get_bulk_data_ptr(), *stkField); -} \ No newline at end of file +} diff --git a/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp b/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp index 0f975a2adcb6..811691769c87 100644 --- a/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp +++ b/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp @@ -70,6 +70,7 @@ struct SimpleStruct { }; struct BaseStruct { + virtual ~BaseStruct() = default; virtual void set_i(const int) = 0; KOKKOS_FUNCTION virtual void print() { diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp index 25ced2655de8..8ac8f984a259 100644 --- a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp @@ -401,4 +401,44 @@ TEST(CoarseSearchCorrectness, Ngp_Local_NotQuiteEdgeOverlappingBoxes_ARBORX) device_local_runTwoBoxTest(stk::search::ARBORX, distanceBetweenBoxCenters, boxSize, expectedNumOverlap); } +TEST(CoarseSearchCorrectness, UpdateInteriorNodeBVsAtomicsIssueReproducer) +{ + std::vector> boxes(256); + + double coord_min = -2.1; + double coord_max = 2.1; + + int x_points = 5; + int y_points = 5; + int z_points = 9; + + for (int i=0; i < z_points; i++) { + double z_coord_min = coord_min + 1*i; + double z_coord_max = coord_max + 1*i; + + for (int j=0; j < y_points; j++) { + double y_coord_min = coord_min + 1*j; + double y_coord_max = coord_max + 1*j; + + for (int k=0; k < x_points; k++) { + double x_coord_min = coord_min + 1*k; + double x_coord_max = coord_max + 1*k; + + int index = k + x_points*j + x_points*y_points*i; + boxes[index] = stk::search::Box(x_coord_min, y_coord_min, z_coord_min, + x_coord_max, y_coord_max, z_coord_max); + } + } + } + + using ExecSpace = Kokkos::DefaultExecutionSpace; + stk::search::CollisionList collisions("collision_list"); + stk::search::morton_lbvh_search::value_type, ExecSpace, stk::search::Box>(boxes, boxes, collisions); + collisions.sync_from_device(); + + int numExpectedCollisions = 38125; + EXPECT_EQ(collisions.get_num_collisions(), numExpectedCollisions); + +} + } diff --git a/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp b/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp index 5266109ae525..04074a484175 100644 --- a/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp +++ b/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp @@ -130,7 +130,10 @@ inline void check_side_node_ordinals(stk::topology topology, const std::vector 0) ? sideTopo.num_nodes() : 1; std::vector side_node_ordinals(numSideNodes); topology.side_node_ordinals(side, side_node_ordinals.data()); - EXPECT_EQ(gold_side_node_ordinals[side], side_node_ordinals); + + for (unsigned i = 0; i < numSideNodes; ++i) { + EXPECT_EQ(gold_side_node_ordinals[side][i], side_node_ordinals[i]); + } } } diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp index 8ed599943345..10aa5aa01785 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp @@ -71,7 +71,7 @@ std::vector> get_gold_side_node_ordinals_shell_quad4() { }; } -TEST(stk_topology, shell_shell_quad4) +TEST(stk_topology, shell_quad_4) { stk::topology t = stk::topology::SHELL_QUAD_4; @@ -104,10 +104,10 @@ TEST(stk_topology, shell_shell_quad4) EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_4); EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_4); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(5), stk::topology::LINE_2); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_quad4()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_quad4()); @@ -159,10 +159,10 @@ void check_shell_quad_4_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_4); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_4); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_2); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(5), stk::topology::LINE_2); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -210,7 +210,7 @@ std::vector> get_gold_side_node_ordinals_shell_quad8() { }; } -TEST(stk_topology, shell_shell_quad8) +TEST(stk_topology, shell_quad_8) { stk::topology t = stk::topology::SHELL_QUAD_8; @@ -243,10 +243,10 @@ TEST(stk_topology, shell_shell_quad8) EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_8); EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_8); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_quad8()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_quad8()); @@ -298,10 +298,10 @@ void check_shell_quad_8_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_8); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_8); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -349,7 +349,7 @@ std::vector> get_gold_side_node_ordinals_shell_quad9() { }; } -TEST(stk_topology, shell_shell_quad9) +TEST(stk_topology, shell_quad_9) { stk::topology t = stk::topology::SHELL_QUAD_9; @@ -382,10 +382,10 @@ TEST(stk_topology, shell_shell_quad9) EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_9); EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_9); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_quad9()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_quad9()); @@ -437,10 +437,10 @@ void check_shell_quad_9_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_9); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_9); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp index 5bf526b6ec65..ff1cad2ab650 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp @@ -76,6 +76,7 @@ TEST(stk_topology, shell_tri_3) EXPECT_TRUE(t.is_valid()); EXPECT_TRUE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); //FIXME this will become false EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -101,9 +102,9 @@ TEST(stk_topology, shell_tri_3) EXPECT_EQ(t.side_topology(0), stk::topology::TRI_3); EXPECT_EQ(t.side_topology(1), stk::topology::TRI_3); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_tri3()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_tri3()); @@ -155,9 +156,9 @@ void check_shell_tri_3_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::TRI_3); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::TRI_3); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -235,9 +236,9 @@ TEST(stk_topology, shell_tri_4) EXPECT_EQ(t.side_topology(0), stk::topology::TRI_4); EXPECT_EQ(t.side_topology(1), stk::topology::TRI_4); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_tri4()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_tri4()); @@ -289,9 +290,9 @@ void check_shell_tri_4_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::TRI_4); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::TRI_4); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -369,9 +370,9 @@ TEST(stk_topology, shell_tri_6) EXPECT_EQ(t.side_topology(0), stk::topology::TRI_6); EXPECT_EQ(t.side_topology(1), stk::topology::TRI_6); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_tri6()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_tri6()); @@ -427,9 +428,9 @@ void check_shell_tri_6_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::TRI_6); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::TRI_6); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp index eb357fe92c4a..dad3e3e13ae4 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp @@ -35,7 +35,7 @@ #include "Kokkos_Core.hpp" // for parallel_for, KOKKOS_LAMBDA #include "gtest/gtest.h" // for AssertionResult, Message, TestPartResult #include "stk_ngp_test/ngp_test.hpp" // for NGP_EXPECT_EQ, NGP_EXPECT_FALSE, NGP_EXPECT_... -#include "stk_topology/topology.hpp" // for topology, topology::QUAD_4, topology::QUAD_8 +#include "stk_topology/topology.hpp" #include "topology_test_utils.hpp" // for check_edge_node_ordinals, check_edge_node_or... #include // for size_t #include // for operator<<, basic_ostream, basic_ostream<>::... @@ -78,6 +78,7 @@ TEST(stk_topology, shell_tri_3_all_face_sides) EXPECT_TRUE(t.is_valid()); EXPECT_FALSE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -135,6 +136,7 @@ void check_shell_tri_3_all_face_sides_on_device() NGP_EXPECT_TRUE(t.is_valid()); NGP_EXPECT_FALSE(t.has_homogeneous_faces()); NGP_EXPECT_TRUE(t.is_shell()); + NGP_EXPECT_TRUE(t.is_shell_with_face_sides()); NGP_EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); NGP_EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -221,6 +223,7 @@ TEST(stk_topology, shell_tri_4_all_face_sides) EXPECT_TRUE(t.is_valid()); EXPECT_FALSE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -278,6 +281,7 @@ void check_shell_tri_4_all_face_sides_on_device() NGP_EXPECT_TRUE(t.is_valid()); NGP_EXPECT_FALSE(t.has_homogeneous_faces()); NGP_EXPECT_TRUE(t.is_shell()); + NGP_EXPECT_TRUE(t.is_shell_with_face_sides()); NGP_EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); NGP_EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -364,6 +368,7 @@ TEST(stk_topology, shell_tri_6_all_face_sides) EXPECT_TRUE(t.is_valid()); EXPECT_FALSE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -421,6 +426,7 @@ void check_shell_tri_6_all_face_sides_on_device() NGP_EXPECT_TRUE(t.is_valid()); NGP_EXPECT_FALSE(t.has_homogeneous_faces()); NGP_EXPECT_TRUE(t.is_shell()); + NGP_EXPECT_TRUE(t.is_shell_with_face_sides()); NGP_EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); NGP_EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); diff --git a/packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp b/packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp new file mode 100644 index 000000000000..d28a0568e7d8 --- /dev/null +++ b/packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp @@ -0,0 +1,255 @@ +#include "gtest/gtest.h" +#include "stk_util/diag/ParallelTimerImpl.hpp" +#include "stk_util/diag/Timer.hpp" +#include "stk_util/diag/TimerImpl.hpp" +#include "stk_util/diag/TimerMetricTraits.hpp" +#include "stk_util/parallel/Parallel.hpp" + +namespace { +stk::diag::impl::ParallelTimer create_timer(const std::string& name, double val) +{ + stk::diag::impl::ParallelTimer timer; + timer.m_name = name; + timer.m_cpuTime.m_value = val; + timer.m_cpuTime.m_sum = val; + timer.m_cpuTime.m_min = val; + timer.m_cpuTime.m_max = val; + + return timer; +} +} + +namespace stk::diag { + +class TimerTester +{ + public: + TimerTester(Timer& timer) : + m_timer(timer) + {} + + double getCPUTime() const + { + return m_timer.getMetric().m_accumulatedLap; + } + + void setCPUTime(double val) + { + m_timer.m_timerImpl->m_cpuTime.m_accumulatedLap = val; + } + + private: + Timer& m_timer; +}; +} + +TEST(ParallelTimer, MergeSingleLevelTimers) +{ + double val1 = 1.0, val2 = 2.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + + stk::diag::impl::merge_parallel_timer(t1, t2, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val2); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val2); +} + +TEST(ParallelTimer, MergeTwoLevelTimers) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer2", val4); + + t1.m_subtimerList.push_back(t2); + t3.m_subtimerList.push_back(t4); + + stk::diag::impl::merge_parallel_timer(t1, t3, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val3); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val3); + EXPECT_EQ(t1.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t2Merged = t1.m_subtimerList.front(); + EXPECT_EQ(t2Merged.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_sum, val2 + val4); + EXPECT_EQ(t2Merged.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_max, val4); +} + +TEST(ParallelTimer, MergeTwoLevelTimersDifferentNames) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer4", val4); + + t1.m_subtimerList.push_back(t2); + t3.m_subtimerList.push_back(t4); + + stk::diag::impl::merge_parallel_timer(t1, t3, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val3); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val3); + EXPECT_EQ(t1.m_subtimerList.size(), 2U); + + stk::diag::impl::ParallelTimer t2Copy = t1.m_subtimerList.front(); + EXPECT_EQ(t2Copy.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_sum, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_max, val2); + + stk::diag::impl::ParallelTimer t4Copy = *(++t1.m_subtimerList.begin()); + EXPECT_EQ(t4Copy.m_cpuTime.m_value, val4); + EXPECT_EQ(t4Copy.m_cpuTime.m_sum, val4); + EXPECT_EQ(t4Copy.m_cpuTime.m_min, val4); + EXPECT_EQ(t4Copy.m_cpuTime.m_max, val4); +} + +TEST(ParallelTimer, MergeThreeLevelTimers) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + double val5 = 5.0, val6 = 6.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer1", val4); + stk::diag::impl::ParallelTimer t5 = create_timer("timer2", val5); + stk::diag::impl::ParallelTimer t6 = create_timer("timer3", val6); + + t2.m_subtimerList.push_back(t3); + t1.m_subtimerList.push_back(t2); + + t5.m_subtimerList.push_back(t6); + t4.m_subtimerList.push_back(t5); + + stk::diag::impl::merge_parallel_timer(t1, t4, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val4); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val4); + EXPECT_EQ(t1.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t2Merged = t1.m_subtimerList.front(); + EXPECT_EQ(t2Merged.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_sum, val2 + val5); + EXPECT_EQ(t2Merged.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_max, val5); + EXPECT_EQ(t2Merged.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t3Merged = t2Merged.m_subtimerList.front(); + EXPECT_EQ(t3Merged.m_cpuTime.m_value, val3); + EXPECT_EQ(t3Merged.m_cpuTime.m_sum, val3 + val6); + EXPECT_EQ(t3Merged.m_cpuTime.m_min, val3); + EXPECT_EQ(t3Merged.m_cpuTime.m_max, val6); + EXPECT_EQ(t3Merged.m_subtimerList.size(), 0U); +} + +TEST(ParallelTimer, MergeThreeLevelTimersDifferentNames) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + double val5 = 5.0, val6 = 6.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer4", val4); + stk::diag::impl::ParallelTimer t5 = create_timer("timer5", val5); + stk::diag::impl::ParallelTimer t6 = create_timer("timer6", val6); + + t2.m_subtimerList.push_back(t3); + t1.m_subtimerList.push_back(t2); + + t5.m_subtimerList.push_back(t6); + t4.m_subtimerList.push_back(t5); + + stk::diag::impl::merge_parallel_timer(t1, t4, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val4); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val4); + EXPECT_EQ(t1.m_subtimerList.size(), 2U); + + stk::diag::impl::ParallelTimer t2Copy = t1.m_subtimerList.front(); + EXPECT_EQ(t2Copy.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_sum, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_max, val2); + EXPECT_EQ(t2Copy.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t5Copy = t1.m_subtimerList.back(); + EXPECT_EQ(t5Copy.m_cpuTime.m_value, val5); + EXPECT_EQ(t5Copy.m_cpuTime.m_sum, val5); + EXPECT_EQ(t5Copy.m_cpuTime.m_min, val5); + EXPECT_EQ(t5Copy.m_cpuTime.m_max, val5); + EXPECT_EQ(t5Copy.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t3Copy = t2Copy.m_subtimerList.front(); + EXPECT_EQ(t3Copy.m_cpuTime.m_value, val3); + EXPECT_EQ(t3Copy.m_cpuTime.m_sum, val3); + EXPECT_EQ(t3Copy.m_cpuTime.m_min, val3); + EXPECT_EQ(t3Copy.m_cpuTime.m_max, val3); + EXPECT_EQ(t3Copy.m_subtimerList.size(), 0U); + + stk::diag::impl::ParallelTimer t6Copy = t5Copy.m_subtimerList.front(); + EXPECT_EQ(t6Copy.m_cpuTime.m_value, val6); + EXPECT_EQ(t6Copy.m_cpuTime.m_sum, val6); + EXPECT_EQ(t6Copy.m_cpuTime.m_min, val6); + EXPECT_EQ(t6Copy.m_cpuTime.m_max, val6); + EXPECT_EQ(t6Copy.m_subtimerList.size(), 0U); +} + + +TEST(ParallelTimer, CollectTimersChunkSize1) +{ + stk::ParallelMachine comm = stk::parallel_machine_world(); + int commSize = stk::parallel_machine_size(comm); + int commRank = stk::parallel_machine_rank(comm); + double cpuTimeVal = commRank; + stk::diag::Timer rootTimer = stk::diag::createRootTimer("rootTimer", stk::diag::TimerSet(stk::diag::getEnabledTimerMetricsMask())); + stk::diag::TimerTester(rootTimer).setCPUTime(cpuTimeVal); + + const int maxProcsPerGather = 1; + stk::diag::impl::ParallelTimer parallelTimer = stk::diag::impl::collect_timers(rootTimer, false, comm, maxProcsPerGather); + + if (commRank == 0) + { + EXPECT_EQ(parallelTimer.m_cpuTime.m_min, 0.0); + EXPECT_EQ(parallelTimer.m_cpuTime.m_max, commSize - 1); + EXPECT_EQ(parallelTimer.m_cpuTime.m_sum, commSize * (commSize - 1) / 2.0); + } + + stk::diag::deleteRootTimer(rootTimer); +} + +TEST(ParallelTimer, CollectTimersChunkSize2) +{ + stk::ParallelMachine comm = stk::parallel_machine_world(); + int commSize = stk::parallel_machine_size(comm); + int commRank = stk::parallel_machine_rank(comm); + double cpuTimeVal = commRank + 1; + stk::diag::Timer rootTimer = stk::diag::createRootTimer("rootTimer", stk::diag::TimerSet(stk::diag::getEnabledTimerMetricsMask())); + stk::diag::TimerTester(rootTimer).setCPUTime(cpuTimeVal); + + const int maxProcsPerGather = 2; + stk::diag::impl::ParallelTimer parallelTimer = stk::diag::impl::collect_timers(rootTimer, false, comm, maxProcsPerGather); + + if (commRank == 0) + { + EXPECT_EQ(parallelTimer.m_cpuTime.m_min, 1.0); + EXPECT_EQ(parallelTimer.m_cpuTime.m_max, commSize); + EXPECT_EQ(parallelTimer.m_cpuTime.m_sum, commSize * (1 + commSize) / 2.0); + } + + stk::diag::deleteRootTimer(rootTimer); +} \ No newline at end of file diff --git a/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp b/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp index 64098c3ae44f..bdf5390c59e2 100644 --- a/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp +++ b/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp @@ -245,10 +245,10 @@ class DenseParallelCommTesterBase : public ParallelCommTester set_send_buffers_values(); } - void set_recv_buffer_sizes(std::vector< std::vector >& recvLists) + void set_recv_buffer_sizes(std::vector< std::vector >& rcvLists) { for (int src=0; src < commSize; ++src) { - recvLists[src].resize(this->get_size(src, myrank)); + rcvLists[src].resize(this->get_size(src, myrank)); } } @@ -256,10 +256,10 @@ class DenseParallelCommTesterBase : public ParallelCommTester virtual int get_num_recvs() override { return commSize; } - void test_results(std::vector< std::vector >& recvLists) + void test_results(std::vector< std::vector >& rcvLists) { for (int src=0; src < commSize; ++src) { - test_recv_vals(recvLists[src], src); + test_recv_vals(rcvLists[src], src); } } @@ -275,9 +275,9 @@ class DenseParallelCommTesterBase : public ParallelCommTester } } - void test_send_ranks(std::vector< std::vector >& sendLists) + void test_send_ranks(std::vector< std::vector >& sndLists) { - test_ranks_inner(sendLists); + test_ranks_inner(sndLists); } @@ -348,22 +348,22 @@ class NeighborParallelCommTesterBase : public ParallelCommTester set_send_buffers_values(); } - void set_recv_buffer_sizes(std::vector< std::vector >& recvLists) + void set_recv_buffer_sizes(std::vector< std::vector >& rcvLists) { int src1 = (myrank - 1 + commSize) % commSize; int src2 = (myrank - 2 + commSize) % commSize; - recvLists[src1].resize(this->get_size(src1, myrank)); - recvLists[src2].resize(this->get_size(src2, myrank)); + rcvLists[src1].resize(this->get_size(src1, myrank)); + rcvLists[src2].resize(this->get_size(src2, myrank)); } virtual int get_num_sends() override { return std::min(2, commSize); } virtual int get_num_recvs() override { return std::min(2, commSize); } - void test_results(std::vector< std::vector >& recvLists) + void test_results(std::vector< std::vector >& rcvLists) { for (int src=0; src < commSize; ++src) { - test_recv_vals(recvLists[src], src); + test_recv_vals(rcvLists[src], src); } } @@ -382,10 +382,10 @@ class NeighborParallelCommTesterBase : public ParallelCommTester } } - void test_send_ranks(std::vector>& sendLists) + void test_send_ranks(std::vector>& sndLists) { - std::vector sendRanks = get_ranks(sendLists); + std::vector sendRanks = get_ranks(sndLists); int len = sendRanks.size(); int dest1 = (myrank + 1) % commSize; @@ -404,9 +404,9 @@ class NeighborParallelCommTesterBase : public ParallelCommTester } } - void test_recv_ranks(std::vector>& recvLists) + void test_recv_ranks(std::vector>& rcvLists) { - auto recvRanks = get_ranks(recvLists); + auto recvRanks = get_ranks(rcvLists); int len = recvRanks.size(); int dest1 = (myrank - 1 + commSize) % commSize; diff --git a/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp b/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp index eb5d63d7f9f5..3fdf583b1926 100644 --- a/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp +++ b/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp @@ -32,8 +32,9 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -#include "gtest/gtest.h" #include "stk_util/util/StridedArray.hpp" +#include "Kokkos_Core.hpp" +#include "gtest/gtest.h" #include TEST( StridedArray, ptr_and_size) diff --git a/packages/stk/stk_util/stk_util/Version.hpp b/packages/stk/stk_util/stk_util/Version.hpp index 4f4efaf1655d..6d7b47a1ec5a 100644 --- a/packages/stk/stk_util/stk_util/Version.hpp +++ b/packages/stk/stk_util/stk_util/Version.hpp @@ -44,7 +44,7 @@ //See the file CHANGELOG.md for a listing that shows the //correspondence between version numbers and API changes. -#define STK_VERSION 5210601 +#define STK_VERSION 5230200 namespace stk diff --git a/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp b/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp index a3c4a8c17459..a5316a474ee6 100644 --- a/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp +++ b/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp @@ -52,15 +52,14 @@ struct CommandLineOption class CommandLineParser { public: - enum ParseState { ParseComplete, ParseError, ParseHelpOnly, ParseVersionOnly }; - CommandLineParser() : CommandLineParser("Options") {} - explicit CommandLineParser(const std::string &usagePreamble) - : optionsSpec(usagePreamble), - parsedOptions(), - positionalIndex(0) - { - add_flag("help,h", "display this help message and exit"); - add_flag("version,v", "display version information and exit"); + virtual ~CommandLineParser() = default; + enum ParseState { ParseComplete, ParseError, ParseHelpOnly, ParseVersionOnly }; + CommandLineParser() : CommandLineParser("Options") {} + explicit CommandLineParser(const std::string &usagePreamble) + : optionsSpec(usagePreamble), parsedOptions(), positionalIndex(0) + { + add_flag("help,h", "display this help message and exit"); + add_flag("version,v", "display version information and exit"); } void disallow_unrecognized() diff --git a/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp new file mode 100644 index 000000000000..e8dca41f2527 --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp @@ -0,0 +1,254 @@ +#include "ParallelTimerImpl.hpp" +#include "stk_util/util/Marshal.hpp" + +namespace stk::diag::impl { + +ParallelTimer::ParallelTimer() + : m_name(), + m_timerMask(0), + m_subtimerLapCount(0), + m_lapCount(), + m_cpuTime(), + m_wallTime(), + m_MPICount(), + m_MPIByteCount(), + m_heapAlloc(), + m_subtimerList() +{} + +ParallelTimer::ParallelTimer(const ParallelTimer ¶llel_timer) + : m_name(parallel_timer.m_name), + m_timerMask(parallel_timer.m_timerMask), + m_subtimerLapCount(parallel_timer.m_subtimerLapCount), + m_lapCount(parallel_timer.m_lapCount), + m_cpuTime(parallel_timer.m_cpuTime), + m_wallTime(parallel_timer.m_wallTime), + m_MPICount(parallel_timer.m_MPICount), + m_MPIByteCount(parallel_timer.m_MPIByteCount), + m_heapAlloc(parallel_timer.m_heapAlloc), + m_subtimerList(parallel_timer.m_subtimerList) +{} + +ParallelTimer &ParallelTimer::operator=(const ParallelTimer ¶llel_timer) { + m_name = parallel_timer.m_name; + m_timerMask = parallel_timer.m_timerMask; + m_subtimerLapCount = parallel_timer.m_subtimerLapCount; + m_lapCount = parallel_timer.m_lapCount; + m_cpuTime = parallel_timer.m_cpuTime; + m_wallTime = parallel_timer.m_wallTime; + m_MPICount = parallel_timer.m_MPICount; + m_heapAlloc = parallel_timer.m_heapAlloc; + m_subtimerList = parallel_timer.m_subtimerList; + + return *this; +} + + +Writer & +ParallelTimer::dump(Writer &dout) const { + if (dout.shouldPrint()) { + dout << "ParallelTimer " << m_name << push << dendl; + dout << "m_name " << m_name << dendl; + dout << "m_timerMask " << hex << m_timerMask << dendl; + dout << "m_subtimerLapCount " << m_subtimerLapCount << dendl; + dout << "m_lapCount " << m_lapCount << dendl; + dout << "m_cpuTime " << m_cpuTime << dendl; + dout << "m_wallTime " << m_wallTime << dendl; + dout << "m_MPICount " << m_MPICount << dendl; + dout << "m_MPIByteCount " << m_MPIByteCount << dendl; + dout << "m_heapAlloc " << m_heapAlloc << dendl; + dout << "m_subtimerList " << m_subtimerList << dendl; + dout << pop; + } + return dout; +} + +void +merge_parallel_timer( + ParallelTimer & p0, + const ParallelTimer & p1, + bool checkpoint) +{ + p0.m_timerMask = p1.m_timerMask; + p0.m_subtimerLapCount += p1.m_subtimerLapCount; + p0.m_lapCount.accumulate(p1.m_lapCount, checkpoint); + p0.m_cpuTime.accumulate(p1.m_cpuTime, checkpoint); + p0.m_wallTime.accumulate(p1.m_wallTime, checkpoint); + p0.m_MPICount.accumulate(p1.m_MPICount, checkpoint); + p0.m_MPIByteCount.accumulate(p1.m_MPIByteCount, checkpoint); + p0.m_heapAlloc.accumulate(p1.m_heapAlloc, checkpoint); + + + for (std::list::const_iterator p1_it = p1.m_subtimerList.begin(); p1_it != p1.m_subtimerList.end(); ++p1_it) { + std::list::iterator p0_it = std::find_if(p0.m_subtimerList.begin(), p0.m_subtimerList.end(), finder((*p1_it).m_name)); + if (p0_it == p0.m_subtimerList.end()) { + p0.m_subtimerList.push_back((*p1_it)); + } + else + merge_parallel_timer(*p0_it, *p1_it, checkpoint); + } +} + +stk::Marshal &operator>>(stk::Marshal &min, ParallelTimer &t) { + min >> t.m_name >> t.m_timerMask >> t.m_subtimerLapCount + >> t.m_lapCount.m_value + >> t.m_lapCount.m_checkpoint + >> t.m_cpuTime.m_value + >> t.m_cpuTime.m_checkpoint + >> t.m_wallTime.m_value + >> t.m_wallTime.m_checkpoint + >> t.m_MPICount.m_value + >> t.m_MPICount.m_checkpoint + >> t.m_MPIByteCount.m_value + >> t.m_MPIByteCount.m_checkpoint + >> t.m_heapAlloc.m_value + >> t.m_heapAlloc.m_checkpoint; + + min >> t.m_subtimerList; + + return min; +} + + +#ifdef STK_HAS_MPI +size_t round_up_to_next_word(size_t value) +{ + const size_t SIZE_OF_WORD = 4; + size_t remainder = value % SIZE_OF_WORD; + if (remainder == 0) { + return value; + } + return value + SIZE_OF_WORD - remainder; +} +#endif + +ParallelTimer +collect_timers( + const Timer & root_timer, + bool checkpoint, + ParallelMachine comm, + const int max_procs_per_gather) +{ + Marshal mout; + mout << root_timer; + impl::ParallelTimer root_parallel_timer; + +#ifdef STK_HAS_MPI + const int parallel_root = 0 ; + const int parallel_size = parallel_machine_size(comm); + const int parallel_rank = parallel_machine_rank(comm); + + // Gather the send counts on root processor + std::string send_string(mout.str()); + int send_count = send_string.size(); + send_string.resize(round_up_to_next_word(send_count)); + int padded_send_count = send_string.size(); + + + //We need to gather the timer data in a number of 'cycles' where we + //only receive from a portion of the other processors each cycle. + //This is because buffer allocation-failures have been observed for + //runs on very large numbers of processors if the 'root' processor tries + //to allocate a buffer large enough to hold timing data from all other + //procesors. + //We will set an arbitrary limit for now, making sure that no more than + //a given number of processors' worth of timer data is gathered at a time. + int num_cycles = parallel_size/max_procs_per_gather; + if (parallel_size < max_procs_per_gather || num_cycles < 1) { + num_cycles = 1; + } + + std::vector recv_buffer; + + for(int ii=0; ii recv_count(parallel_size, 0); + std::vector padded_recv_count(parallel_size, 0); + + { + int result = MPI_Gather(&send_count_this_cycle, 1, MPI_INT, + recv_count.data(), 1, MPI_INT, + parallel_root, comm); + if (MPI_SUCCESS != result) { + std::ostringstream message ; + message << "stk::diag::collect_timers FAILED: send_count MPI_Gather = " << result ; + throw std::runtime_error(message.str()); + } + } + + { + int result = MPI_Gather(&padded_send_count_this_cycle, 1, MPI_INT, + padded_recv_count.data(), 1, MPI_INT, + parallel_root, comm); + if (MPI_SUCCESS != result) { + std::ostringstream message ; + message << "stk::diag::collect_timers FAILED: padded_send_count MPI_Gather = " << result ; + throw std::runtime_error(message.str()); + } + } + + // Receive counts are only non-zero on the root processor: + std::vector recv_displ(parallel_size + 1, 0); + std::vector recv_end(parallel_size + 1, 0); + + for (int i = 0 ; i < parallel_size ; ++i) { + recv_displ[i + 1] = recv_displ[i] + padded_recv_count[i] ; + recv_end[i] = recv_displ[i] + recv_count[i] ; + } + + const int recv_size = recv_displ[parallel_size] ; + + recv_buffer.assign(recv_size, 0); + + { + int result = MPI_Gatherv(send_string.data(), padded_send_count_this_cycle, MPI_CHAR, + recv_buffer.data(), padded_recv_count.data(), recv_displ.data(), MPI_CHAR, + parallel_root, comm); + if (MPI_SUCCESS != result) { + std::ostringstream message ; + message << "stk::diag::collect_timers FAILED: MPI_Gatherv = " << result ; + throw std::runtime_error(message.str()); + } + + std::vector parallel_timer_vector; + parallel_timer_vector.reserve(parallel_size); + + if (parallel_rank == parallel_root) { + for (int j = 0; j < parallel_size; ++j) { + int received_count = recv_displ[j+1] - recv_displ[j]; + if (received_count > 0) { + //grow parallel_timer_vector by 1: + parallel_timer_vector.resize(parallel_timer_vector.size()+1); + Marshal min(std::string(recv_buffer.data() + recv_displ[j], recv_buffer.data() + recv_end[j])); + //put this data into the last entry of parallel_timer_vector: + min >> parallel_timer_vector[parallel_timer_vector.size()-1]; + } + } + + if (parallel_rank==parallel_root && send_count_this_cycle>0) + { + root_parallel_timer = parallel_timer_vector[0]; + } + + for (size_t j = 0; j < parallel_timer_vector.size(); ++j) + { + merge_parallel_timer(root_parallel_timer, parallel_timer_vector[j], checkpoint); + } + } + } + } +#else + Marshal min(mout.str()); + min >> root_parallel_timer; + merge_parallel_timer(root_parallel_timer, root_parallel_timer, checkpoint); +#endif + + return root_parallel_timer; +} + +} \ No newline at end of file diff --git a/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp new file mode 100644 index 000000000000..c18de9b4774b --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp @@ -0,0 +1,210 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef STK_UTIL_DIAG_ParallelTimerImpl_hpp +#define STK_UTIL_DIAG_ParallelTimerImpl_hpp + +#include "stk_util/diag/Timer.hpp" +#include "stk_util/util/Writer.hpp" +#include "WriterExt.hpp" +#include "stk_util/util/string_case_compare.hpp" // for equal_case +#include "TimerMetricTraits.hpp" +#include +#include +#include + +namespace stk { struct Marshal; } + +namespace stk::diag { +namespace impl { + +struct ParallelTimer +{ + template + struct Metric + { + Metric() + : m_value(0), + m_sum(0.0), + m_min(std::numeric_limits::max()), + m_max(0.0) + {} + + typename MetricTraits::Type m_value; ///< Metric value + typename MetricTraits::Type m_checkpoint; ///< Metric checkpointed value + double m_sum; ///< Reduction sum + double m_min; ///< Reduction min + double m_max; ///< Reduction max + + void accumulate(const Metric &metric, bool checkpoint) { + double value = static_cast(metric.m_value); + if (checkpoint) + value -= static_cast(metric.m_checkpoint); + + m_sum += value; + m_min = std::min(m_min, value); + m_max = std::max(m_max, value); + } + + Writer &dump(Writer &dout) const { + if (dout.shouldPrint()) { + dout << "Metric<" << typeid(typename MetricTraits::Type) << ">" << push << dendl; + dout << "m_value " << m_value << dendl; + dout << "m_checkpoint " << m_value << dendl; + dout << "m_sum " << m_sum << dendl; + dout << "m_min " << m_min << dendl; + dout << "m_max " << m_max << dendl; + dout << pop; + } + return dout; + } + }; + + ParallelTimer(); + + ParallelTimer(const ParallelTimer ¶llel_timer); + + ParallelTimer &operator=(const ParallelTimer ¶llel_timer); + + template + const Metric &getMetric() const; + + std::string m_name; ///< Name of the timer + TimerMask m_timerMask; + double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount + + Metric m_lapCount; ///< Number of laps accumulated + Metric m_cpuTime; ///< CPU time + Metric m_wallTime; ///< Wall time + Metric m_MPICount; ///< MPI call count + Metric m_MPIByteCount; ///< MPI byte count + Metric m_heapAlloc; ///< MPI byte count + + std::list m_subtimerList; ///< Sub timers + + Writer &dump(Writer &dout) const; +}; + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_lapCount; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_cpuTime; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_wallTime; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_MPICount; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_MPIByteCount; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_heapAlloc; +} + + +template +Writer &operator<<(Writer &dout, const ParallelTimer::Metric &t) { + return t.dump(dout); +} + +inline Writer &operator<<(Writer &dout, const ParallelTimer ¶llel_timer) { + return parallel_timer.dump(dout); +} + +stk::Marshal &operator>>(stk::Marshal &min, ParallelTimer &t); + +#ifdef __INTEL_COMPILER +#pragma warning(push) +#pragma warning(disable: 444) +#endif +class finder +{ +public: + finder(const std::string &name) + : m_name(name) + {} + + bool operator()(const ParallelTimer ¶llel_timer) const { + return equal_case(parallel_timer.m_name, m_name); + } + +private: + std::string m_name; +}; +#ifdef __INTEL_COMPILER +#pragma warning(pop) +#endif + +void +merge_parallel_timer( + ParallelTimer & p0, + const ParallelTimer & p1, + bool checkpoint); + +ParallelTimer +collect_timers( + const Timer & root_timer, + bool checkpoint, + ParallelMachine comm, + const int max_procs_per_gather = 64); + +} +} + +#endif diff --git a/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp b/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp index 209a947bf996..48b2ec3a988a 100644 --- a/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp +++ b/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,19 +30,19 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #include "stk_util/diag/PrintTimer.hpp" #include "stk_util/diag/PrintTable.hpp" // for operator<<, PrintTable, end_col, PrintT... #include "stk_util/diag/Timer.hpp" // for getEnabledTimerMetricsMask, Timer, Time... #include "stk_util/diag/WriterExt.hpp" // for operator<< +#include "stk_util/diag/ParallelTimerImpl.hpp" #include "stk_util/environment/WallTime.hpp" // for wall_time #include "stk_util/parallel/Parallel.hpp" // for parallel_machine_rank, MPI_Gather, para... #include "stk_util/stk_config.h" // for STK_HAS_MPI #include "stk_util/util/Marshal.hpp" // for operator>>, Marshal, operator<< #include "stk_util/util/Writer.hpp" // for operator<<, Writer, dendl, pop, push #include "stk_util/util/WriterManip.hpp" // for hex -#include "stk_util/util/string_case_compare.hpp" // for equal_case #include // for size_t #include // for find_if, max, min #include // for unary_function @@ -54,17 +54,6 @@ #include // for string, char_traits, operator<< #include // for vector -namespace stk { namespace diag { namespace { struct ParallelTimer; } } } - -namespace stk { - -template -Marshal &operator<<(Marshal &mout, const diag::Timer::Metric &t); - -Marshal &operator<<(Marshal &mout, const diag::Timer &t); - -Marshal &operator>>(Marshal &min, diag::ParallelTimer &t); -} namespace stk { namespace diag { @@ -120,7 +109,7 @@ Percent::operator()( strout << "(" << std::setw(5) << std::setprecision(1) << std::fixed << ratio << "%)"; else if (ratio >= 10.0) strout << "(" << std::setw(5) << std::setprecision(2) << std::fixed << ratio << "%)"; - else + else strout << "(" << std::setw(5) << std::setprecision(3) << std::fixed << ratio << "%)"; } @@ -131,370 +120,6 @@ inline std::ostream &operator<<(std::ostream &os, const Percent &p) { return p(os); } -struct ParallelTimer -{ - template - struct Metric - { - Metric() - : m_value(0), - m_sum(0.0), - m_min(std::numeric_limits::max()), - m_max(0.0) - {} - - typename MetricTraits::Type m_value; ///< Metric value - typename MetricTraits::Type m_checkpoint; ///< Metric checkpointed value - double m_sum; ///< Reduction sum - double m_min; ///< Reduction min - double m_max; ///< Reduction max - - void accumulate(const Metric &metric, bool checkpoint) { - double value = static_cast(metric.m_value); - if (checkpoint) - value -= static_cast(metric.m_checkpoint); - - m_sum += value; - m_min = std::min(m_min, value); - m_max = std::max(m_max, value); - } - - Writer &dump(Writer &dout) const { - if (dout.shouldPrint()) { - dout << "Metric<" << typeid(typename MetricTraits::Type) << ">" << push << dendl; - dout << "m_value " << m_value << dendl; - dout << "m_checkpoint " << m_value << dendl; - dout << "m_sum " << m_sum << dendl; - dout << "m_min " << m_min << dendl; - dout << "m_max " << m_max << dendl; - dout << pop; - } - return dout; - } - }; - - ParallelTimer() - : m_name(), - m_timerMask(0), - m_subtimerLapCount(0), - m_lapCount(), - m_cpuTime(), - m_wallTime(), - m_MPICount(), - m_MPIByteCount(), - m_heapAlloc(), - m_subtimerList() - {} - - ParallelTimer(const ParallelTimer ¶llel_timer) - : m_name(parallel_timer.m_name), - m_timerMask(parallel_timer.m_timerMask), - m_subtimerLapCount(parallel_timer.m_subtimerLapCount), - m_lapCount(parallel_timer.m_lapCount), - m_cpuTime(parallel_timer.m_cpuTime), - m_wallTime(parallel_timer.m_wallTime), - m_MPICount(parallel_timer.m_MPICount), - m_MPIByteCount(parallel_timer.m_MPIByteCount), - m_heapAlloc(parallel_timer.m_heapAlloc), - m_subtimerList(parallel_timer.m_subtimerList) - {} - - ParallelTimer &operator=(const ParallelTimer ¶llel_timer) { - m_name = parallel_timer.m_name; - m_timerMask = parallel_timer.m_timerMask; - m_subtimerLapCount = parallel_timer.m_subtimerLapCount; - m_lapCount = parallel_timer.m_lapCount; - m_cpuTime = parallel_timer.m_cpuTime; - m_wallTime = parallel_timer.m_wallTime; - m_MPICount = parallel_timer.m_MPICount; - m_heapAlloc = parallel_timer.m_heapAlloc; - m_subtimerList = parallel_timer.m_subtimerList; - - return *this; - } - - template - const Metric &getMetric() const; - - std::string m_name; ///< Name of the timer - TimerMask m_timerMask; - double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount - - Metric m_lapCount; ///< Number of laps accumulated - Metric m_cpuTime; ///< CPU time - Metric m_wallTime; ///< Wall time - Metric m_MPICount; ///< MPI call count - Metric m_MPIByteCount; ///< MPI byte count - Metric m_heapAlloc; ///< MPI byte count - - std::list m_subtimerList; ///< Sub timers - - Writer &dump(Writer &dout) const; -}; - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_lapCount; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_cpuTime; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_wallTime; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_MPICount; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_MPIByteCount; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_heapAlloc; -} - - -template -Writer &operator<<(Writer &dout, const ParallelTimer::Metric &t) { - return t.dump(dout); -} - -Writer &operator<<(Writer &dout, const ParallelTimer ¶llel_timer) { - return parallel_timer.dump(dout); -} - -Writer & -ParallelTimer::dump(Writer &dout) const { - if (dout.shouldPrint()) { - dout << "ParallelTimer " << m_name << push << dendl; - dout << "m_name " << m_name << dendl; - dout << "m_timerMask " << hex << m_timerMask << dendl; - dout << "m_subtimerLapCount " << m_subtimerLapCount << dendl; - dout << "m_lapCount " << m_lapCount << dendl; - dout << "m_cpuTime " << m_cpuTime << dendl; - dout << "m_wallTime " << m_wallTime << dendl; - dout << "m_MPICount " << m_MPICount << dendl; - dout << "m_MPIByteCount " << m_MPIByteCount << dendl; - dout << "m_heapAlloc " << m_heapAlloc << dendl; - dout << "m_subtimerList " << m_subtimerList << dendl; - dout << pop; - } - return dout; -} - -#ifdef __INTEL_COMPILER -#pragma warning(push) -#pragma warning(disable: 444) -#endif -class finder -{ -public: - finder(const std::string &name) - : m_name(name) - {} - - bool operator()(const ParallelTimer ¶llel_timer) const { - return equal_case(parallel_timer.m_name, m_name); - } - -private: - std::string m_name; -}; -#ifdef __INTEL_COMPILER -#pragma warning(pop) -#endif - - -void -merge_parallel_timer( - ParallelTimer & p0, - const ParallelTimer & p1, - bool checkpoint) -{ - p0.m_timerMask = p1.m_timerMask; - p0.m_subtimerLapCount += p1.m_subtimerLapCount; - p0.m_lapCount.accumulate(p1.m_lapCount, checkpoint); - p0.m_cpuTime.accumulate(p1.m_cpuTime, checkpoint); - p0.m_wallTime.accumulate(p1.m_wallTime, checkpoint); - p0.m_MPICount.accumulate(p1.m_MPICount, checkpoint); - p0.m_MPIByteCount.accumulate(p1.m_MPIByteCount, checkpoint); - p0.m_heapAlloc.accumulate(p1.m_heapAlloc, checkpoint); - - - for (std::list::const_iterator p1_it = p1.m_subtimerList.begin(); p1_it != p1.m_subtimerList.end(); ++p1_it) { - std::list::iterator p0_it = std::find_if(p0.m_subtimerList.begin(), p0.m_subtimerList.end(), finder((*p1_it).m_name)); - if (p0_it == p0.m_subtimerList.end()) { - p0.m_subtimerList.push_back((*p1_it)); - p0_it = --p0.m_subtimerList.end(); - merge_parallel_timer(*p0_it, *p1_it, checkpoint); - } - else - merge_parallel_timer(*p0_it, *p1_it, checkpoint); - } -} - -#ifdef STK_HAS_MPI -size_t round_up_to_next_word(size_t value) -{ - const size_t SIZE_OF_WORD = 4; - size_t remainder = value % SIZE_OF_WORD; - if (remainder == 0) { - return value; - } - return value + SIZE_OF_WORD - remainder; -} -#endif - -void -collect_timers( - Timer & root_timer, - ParallelTimer & parallel_timer, - bool checkpoint, - ParallelMachine comm) -{ - Marshal mout; - mout << root_timer; - -#ifdef STK_HAS_MPI - const int parallel_root = 0 ; - const int parallel_size = parallel_machine_size(comm); - const int parallel_rank = parallel_machine_rank(comm); - - // Gather the send counts on root processor - std::string send_string(mout.str()); - - ParallelTimer root_parallel_timer; - - //We need to gather the timer data in a number of 'cycles' where we - //only receive from a portion of the other processors each cycle. - //This is because buffer allocation-failures have been observed for - //runs on very large numbers of processors if the 'root' processor tries - //to allocate a buffer large enough to hold timing data from all other - //procesors. - //We will set an arbitrary limit for now, making sure that no more than - //64 processors' worth of timer data is gathered at a time. - const int max_procs_per_gather = 64; - int num_cycles = parallel_size/max_procs_per_gather; - if (parallel_size < max_procs_per_gather || num_cycles < 1) { - num_cycles = 1; - } - - std::vector buffer; - - for(int ii=0; ii recv_count(parallel_size, 0); - int * const recv_count_ptr = recv_count.data() ; - std::vector padded_recv_count(parallel_size, 0); - int * const padded_recv_count_ptr = padded_recv_count.data() ; - - //should this processor send on the current cycle ? If not, set send_count to 0. - if ((parallel_rank+ii)%num_cycles!=0) { - send_count = 0; - } - - { - int result = MPI_Gather(&send_count, 1, MPI_INT, - recv_count_ptr, 1, MPI_INT, - parallel_root, comm); - if (MPI_SUCCESS != result) { - std::ostringstream message ; - message << "stk::diag::collect_timers FAILED: send_count MPI_Gather = " << result ; - throw std::runtime_error(message.str()); - } - } - - { - int result = MPI_Gather(&padded_send_count, 1, MPI_INT, - padded_recv_count_ptr, 1, MPI_INT, - parallel_root, comm); - if (MPI_SUCCESS != result) { - std::ostringstream message ; - message << "stk::diag::collect_timers FAILED: padded_send_count MPI_Gather = " << result ; - throw std::runtime_error(message.str()); - } - } - - // Receive counts are only non-zero on the root processor: - std::vector recv_displ(parallel_size + 1, 0); - std::vector recv_end(parallel_size + 1, 0); - - for (int i = 0 ; i < parallel_size ; ++i) { - recv_displ[i + 1] = recv_displ[i] + padded_recv_count[i] ; - recv_end[i] = recv_displ[i] + recv_count[i] ; - } - - const int recv_size = recv_displ[parallel_size] ; - - buffer.assign(recv_size, 0); - - { - const char * const send_ptr = send_string.data(); - char * const recv_ptr = recv_size ? buffer.data() : nullptr; - int * const recv_displ_ptr = recv_displ.data() ; - - int result = MPI_Gatherv(const_cast(send_ptr), padded_send_count, MPI_CHAR, - recv_ptr, padded_recv_count_ptr, recv_displ_ptr, MPI_CHAR, - parallel_root, comm); -// int result = MPI_Gather(const_cast(send_ptr), padded_send_count, MPI_CHAR, -// recv_ptr, padded_send_count, MPI_CHAR, -// parallel_root, comm); - if (MPI_SUCCESS != result) { - std::ostringstream message ; - message << "stk::diag::collect_timers FAILED: MPI_Gatherv = " << result ; - throw std::runtime_error(message.str()); - } - - std::vector parallel_timer_vector; - parallel_timer_vector.reserve(parallel_size); - - if (parallel_rank == parallel_root) { - for (int j = 0; j < parallel_size; ++j) { - int received_count = recv_displ[j+1] - recv_displ[j]; - if (received_count > 0) { - //grow parallel_timer_vector by 1: - parallel_timer_vector.resize(parallel_timer_vector.size()+1); - Marshal min(std::string(recv_ptr + recv_displ[j], recv_ptr + recv_end[j])); - //put this data into the last entry of parallel_timer_vector: - min >> parallel_timer_vector[parallel_timer_vector.size()-1]; - } - } - - if (parallel_rank==parallel_root && send_count>0) root_parallel_timer = parallel_timer_vector[0]; - - for (size_t j = 0; j < parallel_timer_vector.size(); ++j) - merge_parallel_timer(root_parallel_timer, parallel_timer_vector[j], checkpoint); - } - } - } - parallel_timer = root_parallel_timer; -#endif -} - // PrintTable &printTable(PrintTable &table, MPI_Comm mpi_comm, MetricsMask metrics_mask) const; PrintTable & @@ -546,8 +171,8 @@ printSubtable( PrintTable & printSubtable( PrintTable & table, - const ParallelTimer & root_timer, - const ParallelTimer & timer, + const impl::ParallelTimer & root_timer, + const impl::ParallelTimer & timer, MetricsMask metrics_mask, int depth, bool timer_checkpoint) @@ -593,14 +218,14 @@ printSubtable( << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits::format(timer.getMetric().m_max) << " " << std::setw(8) << Percent(timer.getMetric().m_max, root_timer.getMetric().m_sum) << end_col; } - else + else table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.m_name << end_col; table << end_row; depth++; } - for (std::list::const_iterator it = timer.m_subtimerList.begin(); it != timer.m_subtimerList.end(); ++it) + for (std::list::const_iterator it = timer.m_subtimerList.begin(); it != timer.m_subtimerList.end(); ++it) printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint); return table; @@ -661,9 +286,7 @@ printTable( root_timer.accumulateSubtimerLapCounts(); - ParallelTimer parallel_timer; - - stk::diag::collect_timers(root_timer, parallel_timer, timer_checkpoint, parallel_machine); + impl::ParallelTimer parallel_timer = stk::diag::impl::collect_timers(root_timer, timer_checkpoint, parallel_machine); int parallel_rank = parallel_machine_rank(parallel_machine); if (parallel_rank == 0) { @@ -671,7 +294,7 @@ printTable( table.setAutoEndCol(false); table << end_col << end_col; - + if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits::METRIC) table << justify(PrintTable::Cell::CENTER) << MetricTraits::table_header() << end_col << justify(PrintTable::Cell::CENTER) << MetricTraits::table_header() << end_col @@ -722,7 +345,7 @@ printTable( printSubtable(table, parallel_timer, parallel_timer, metrics_mask, 0, timer_checkpoint); } - + if (timer_checkpoint) root_timer.checkpoint(); } @@ -756,15 +379,15 @@ std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask m { double startTimeToPrintTable = stk::wall_time(); stk::PrintTable print_table; - + int parallel_size = parallel_machine_size(parallel_machine); if (parallel_size == 1) printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint); else printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint, parallel_machine); - + os << print_table; - + double durationToPrintTable = stk::wall_time() - startTimeToPrintTable; if (parallel_machine_rank(parallel_machine) == 0) printTimeToPrintTable(os, durationToPrintTable); @@ -773,43 +396,5 @@ std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask m } // namespace diag -Marshal &operator<<(stk::Marshal &mout, const diag::Timer &t); - -template -Marshal &operator<<(Marshal &mout, const diag::Timer::Metric &t) { - mout << t.getAccumulatedLap(false) << t.getAccumulatedLap(true); - - return mout; -} - -Marshal &operator<<(Marshal &mout, const diag::Timer &t) { - mout << t.getName() << t.getTimerMask() << t.getSubtimerLapCount() - << t.getMetric() << t.getMetric() << t.getMetric() - << t.getMetric() << t.getMetric() << t.getMetric(); - - mout << t.getTimerList(); - - return mout; -} - -Marshal &operator>>(Marshal &min, diag::ParallelTimer &t) { - min >> t.m_name >> t.m_timerMask >> t.m_subtimerLapCount - >> t.m_lapCount.m_value - >> t.m_lapCount.m_checkpoint - >> t.m_cpuTime.m_value - >> t.m_cpuTime.m_checkpoint - >> t.m_wallTime.m_value - >> t.m_wallTime.m_checkpoint - >> t.m_MPICount.m_value - >> t.m_MPICount.m_checkpoint - >> t.m_MPIByteCount.m_value - >> t.m_MPIByteCount.m_checkpoint - >> t.m_heapAlloc.m_value - >> t.m_heapAlloc.m_checkpoint; - - min >> t.m_subtimerList; - - return min; -} } // namespace stk diff --git a/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp b/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp index fe381e7b2900..8743068c96c2 100644 --- a/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp +++ b/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #ifndef STK_UTIL_DIAG_PrintTimer_hpp #define STK_UTIL_DIAG_PrintTimer_hpp diff --git a/packages/stk/stk_util/stk_util/diag/Timer.cpp b/packages/stk/stk_util/stk_util/diag/Timer.cpp index ef85027841ba..2f43b4e378b7 100644 --- a/packages/stk/stk_util/stk_util/diag/Timer.cpp +++ b/packages/stk/stk_util/stk_util/diag/Timer.cpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,13 +30,13 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #include "stk_util/diag/Timer.hpp" +#include "stk_util/diag/TimerImpl.hpp" #include "stk_util/diag/WriterExt.hpp" // for operator<< #include "stk_util/stk_config.h" // for STK_HAS_MPI #include "stk_util/util/Writer.hpp" // for operator<<, Writer, dendl, pop, push -#include "stk_util/util/string_case_compare.hpp" // for equal_case #include // for find_if #include // for exception #include // for unary_function @@ -47,22 +47,8 @@ namespace stk { namespace diag { -namespace { - MetricsMask s_enabledMetricsMask = METRICS_LAP_COUNT | METRICS_CPU_TIME | METRICS_WALL_TIME; ///< Bit mask of enabled metrics -template -typename MetricTraits::Type -value_now() { - if (MetricTraits::METRIC & getEnabledTimerMetricsMask()) - return MetricTraits::value_now(); - else - return 0; -} - -} // namespace - - MetricsMask getEnabledTimerMetricsMask() { return s_enabledMetricsMask; @@ -75,284 +61,6 @@ setEnabledTimerMetricsMask( s_enabledMetricsMask = timer_mask | METRICS_LAP_COUNT; } - -/** - * Class TimerImpl is the core timer class. The Timer class is a - * wrapper around TimerImpl so that the buried references can be constructed more easily. - * - * Each timer has a lap counter, cpu timer, wall timer and other metrics. Each time a timer is - * started, the cpu start time, wall start time and other metrics, set to the process' current - * values. When the timer is stopped, the lap counter is incremented, and the cpu, wall, and other - * values are accumulated with the difference between now and the start time. - * - * Each timer may have a list of subordinate timers. The relationship is purely - * hierarchical in that a there is no timing relationship assumed between the timers other - * than the grouping. There is no relation between the starting and stopping of parent - * and subordinate timers. - * - * The subordinate timers are stored as pointers to a new timer on the heap, since the - * calling function will be receiving a reference to this memory which can never change - * location. The subordinate timers are not sorted in the list as they should very - * rarely be created or looked up by name, rather the calling function stores the - * reference via the Timer class. - * - */ -class TimerImpl -{ - friend class Timer; - -public: - static void updateRootTimer(TimerImpl *root_timer); - - static Timer createRootTimer(const std::string &name, const TimerSet &timer_set); - - static void deleteRootTimer(TimerImpl *root_timer); - - static void findTimer(TimerImpl *timer, std::vector &path_tail_vector, std::vector &found_timers); - -private: - /** - * Static function reg returns a reference to an existing timer or newly - * created timer of the specified name which is subordinate to the - * parent timer. - * - * @return a TimerImpl reference to the timer with the - * specified name that is subordinate to the - * parent timer. - */ - static TimerImpl *reg(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set) { - return parent_timer->addSubtimer(name, timer_mask, timer_set); - } - - /** - * Creates a new Timer instance. - * - * @param name a std::string const reference to the name of - * the timer. - * - */ - TimerImpl(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set); - - /** - * Destroys a TimerImpl instance. - * - */ - ~TimerImpl(); - - TimerImpl(const TimerImpl &TimerImpl); - TimerImpl &operator=(const TimerImpl &TimerImpl); - - /** - * Class finder is a binary predicate for finding a subordinate timer. - * - * Note that the subordinate timer is an unsorted list as there are very few timers - * created and should rarely be looked up by name. - */ -#ifdef __INTEL_COMPILER -#pragma warning(push) -#pragma warning(disable: 444) -#endif - class finder - { - public: - explicit finder(const std::string &name) - : m_name(name) - {} - - bool operator()(Timer timer) const { - return equal_case(timer.getName(), m_name); - } - - private: - std::string m_name; - }; -#ifdef __INTEL_COMPILER -#pragma warning(pop) -#endif - -public: - /** - * Member function getName returns the name of the timer. - * - * @return a std::string const reference to the timer's - * name. - */ - const std::string &getName() const { - return m_name; - } - - /** - * Member function getTimerMask returns the timer mask of the timer. - * - * @return a TimerMask value to the timer mask. - */ - TimerMask getTimerMask() const { - return m_timerMask; - } - - /** - * Member function getTimerSet returns the timer set of the timer. - * - * @return a TimerSet const reference to the timer set. - */ - const TimerSet &getTimerSet() const { - return m_timerSet; - } - - /** - * Member function shouldRecord returns true if any of the specified timer - * bit masks are set in the enable timer bit mask. - */ - bool shouldRecord() const { - return m_timerSet.shouldRecord(m_timerMask) && s_enabledMetricsMask; - } - - /** - * Member function getSubtimerLapCount returns the subtimer lap counter. - * - * @return a Counter value of the subtimer lap counter. - */ - double getSubtimerLapCount() const { - return m_subtimerLapCount; - } - - void setSubtimerLapCount(double value) { - m_subtimerLapCount = value; - } - - /** - * Member function getLapCount returns the lap counter metric. The lap - * count metric is the number of times the stop function has been executed. - * - * @return a CounterMetric const reference of the lap counter - * metric. - */ - template - const Timer::Metric &getMetric() const; - - /** - * Member function getTimerList returns the subtimers associated with - * this timer. - * - * @return a TimerList const reference to the sub - * time list. - */ - const TimerList &getTimerList() const { - return m_subtimerList; - } - - TimerList::iterator begin() { - return m_subtimerList.begin(); - } - - TimerList::const_iterator begin() const { - return m_subtimerList.begin(); - } - - TimerList::iterator end() { - return m_subtimerList.end(); - } - - TimerList::const_iterator end() const { - return m_subtimerList.end(); - } - - /** - * Member function reset resets the accumulated time and lap times. - * - */ - void reset(); - - /** - * Member function checkpoint checkpoints the timer and all subtimers. - * - */ - void checkpoint() const; - - /** - * Member function start sets the start timer. - * - * @return a TimerImpl reference to the timer. - */ - TimerImpl &start(); - - /** - * Member function lap sets the stop timer. - * - * @return a TimerImpl reference to the timer. - */ - TimerImpl &lap(); - - /** - * Member function stop sets the stop timer and sums the just completed lap - * time to the timer. - * - * @return a TimerImpl reference to the timer. - */ - TimerImpl &stop(); - - /** - * Member function accumulateSubtimerLapCounts sums the lap counter of all - * subordinate timers. This is used to determin which timers have been activated at all. - * - * @return an int value of the number of subordinate - * timer laps. - */ - double accumulateSubtimerLapCounts() const; - - Timer getSubtimer(const std::string &name); - -public: - /** - * Member function dump writes the timer to the specified - * diagnostic writer. - * - * @param dout a Writer variable reference to write the timer to. - * - * @return a Writer reference to dout. - */ - Writer &dump(Writer &dout) const; - -private: - /** - * Member function addSubtimer returns a reference to an existing or new - * subtimer with the specified name. - * - * @param name a std::string value of the timer's name. - * - * @param timer_mask a TimerMask value of the class of the timer. - * - * @return a TimerImpl reference to the timer with - * specified name. - */ - TimerImpl *addSubtimer(const std::string &name, TimerMask timer_mask, const TimerSet &timer_set); - TimerImpl & child_notifies_of_start(); - TimerImpl & child_notifies_of_stop(); - -private: - std::string m_name; ///< Name of the timer - TimerMask m_timerMask; ///< Bit mask to enable timer - TimerImpl * m_parentTimer; ///< Parent timer - mutable double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount - unsigned m_lapStartCount; ///< Number of pending lap stops - unsigned m_activeChildCount; ///< How many children timers have been started - bool m_childCausedStart; ///< Was this timer started because a child was started? - - TimerList m_subtimerList; ///< List of subordinate timers - - const TimerSet & m_timerSet; ///< Timer enabled mask - Timer::Metric m_lapCount; ///< Number of laps accumulated - Timer::Metric m_cpuTime; ///< CPU time - Timer::Metric m_wallTime; ///< Wall time - Timer::Metric m_MPICount; ///< MPI call count - Timer::Metric m_MPIByteCount; ///< MPI byte count - Timer::Metric m_heapAlloc; ///< Heap allocated -}; - -inline Writer &operator<<(Writer &dout, const TimerImpl &timer) { - return timer.dump(dout); -} - void updateRootTimer( Timer root_timer) @@ -379,321 +87,6 @@ deleteRootTimer( } -TimerImpl::TimerImpl( - const std::string & name, - TimerMask timer_mask, - TimerImpl * parent_timer, - const TimerSet & timer_set) - : m_name(name), - m_timerMask(timer_mask), - m_parentTimer(parent_timer), - m_subtimerLapCount(0.0), - m_lapStartCount(0), - m_activeChildCount(0), - m_childCausedStart(false), - m_subtimerList(), - m_timerSet(timer_set) -{} - - -TimerImpl::~TimerImpl() -{ - try { - for (TimerList::iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - delete (*it).m_timerImpl; - } - catch (std::exception &) { - } -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_lapCount; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_cpuTime; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_wallTime; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_MPICount; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_MPIByteCount; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_heapAlloc; -} - - -void -TimerImpl::reset() -{ - m_lapStartCount = 0; - m_childCausedStart = false; - m_activeChildCount = 0; - - m_lapCount.reset(); - m_cpuTime.reset(); - m_wallTime.reset(); - m_MPICount.reset(); - m_MPIByteCount.reset(); - m_heapAlloc.reset(); -} - - -Timer -TimerImpl::getSubtimer( - const std::string & name) -{ - TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); - - if (it == m_subtimerList.end()) - throw std::runtime_error("Timer not found"); - else - return *it; -} - - -TimerImpl * -TimerImpl::addSubtimer( - const std::string & name, - TimerMask timer_mask, - const TimerSet & timer_set) -{ - TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); - - if (it == m_subtimerList.end()) { - TimerImpl *timer_impl = new TimerImpl(name, timer_mask, this, timer_set); - m_subtimerList.push_back(Timer(timer_impl)); - return timer_impl; - } - else - return (*it).m_timerImpl; -} - - -TimerImpl & -TimerImpl::start() -{ - if (shouldRecord()) { - if (m_lapStartCount == 0) { - ++m_lapStartCount; - m_lapCount.m_lapStart = m_lapCount.m_lapStop; - - m_cpuTime.m_lapStop = m_cpuTime.m_lapStart = value_now(); - m_wallTime.m_lapStop = m_wallTime.m_lapStart = value_now(); - m_MPICount.m_lapStop = m_MPICount.m_lapStart = value_now(); - m_MPIByteCount.m_lapStop = m_MPIByteCount.m_lapStart = value_now(); - m_heapAlloc.m_lapStop = m_heapAlloc.m_lapStart = value_now(); - if(m_parentTimer) - m_parentTimer->child_notifies_of_start(); - } - } - - return *this; -} - - -TimerImpl & -TimerImpl::lap() -{ - if (shouldRecord()) { - if (m_lapStartCount > 0) { - m_cpuTime.m_lapStop = value_now(); - m_wallTime.m_lapStop = value_now(); - m_MPICount.m_lapStop = value_now(); - m_MPIByteCount.m_lapStop = value_now(); - m_heapAlloc.m_lapStop = value_now(); - } - } - - return *this; -} - -TimerImpl & TimerImpl::child_notifies_of_start() -{ - //Start only if not already started and this isn't a root timer - if(m_lapStartCount == 0 && m_parentTimer) - { - start(); - m_childCausedStart = true; - } - m_activeChildCount++; - - return *this; -} - -TimerImpl & TimerImpl::child_notifies_of_stop() -{ - m_activeChildCount--; - if(m_activeChildCount == 0 && m_childCausedStart) - { - stop(); - } - return *this; -} - -TimerImpl & -TimerImpl::stop() -{ - if (shouldRecord()) { - if (m_lapStartCount > 0) { - m_lapStartCount = 0; - m_lapCount.m_lapStop++; - m_childCausedStart = false; - m_activeChildCount = 0; - - m_cpuTime.m_lapStop = value_now(); - m_wallTime.m_lapStop = value_now(); - m_MPICount.m_lapStop = value_now(); - m_MPIByteCount.m_lapStop = value_now(); - m_heapAlloc.m_lapStop = value_now(); - - m_lapCount.addLap(); - m_cpuTime.addLap(); - m_wallTime.addLap(); - m_MPICount.addLap(); - m_MPIByteCount.addLap(); - m_heapAlloc.addLap(); - if(m_parentTimer) - m_parentTimer->child_notifies_of_stop(); - } - } - - return *this; -} - - -double -TimerImpl::accumulateSubtimerLapCounts() const -{ - m_subtimerLapCount = m_lapCount.getAccumulatedLap(false); - - for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - (*it).m_timerImpl->accumulateSubtimerLapCounts(); - - for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - m_subtimerLapCount += (*it).m_timerImpl->m_subtimerLapCount; - - return m_subtimerLapCount; -} - - -void -TimerImpl::checkpoint() const -{ - m_lapCount.checkpoint(); - m_cpuTime.checkpoint(); - m_wallTime.checkpoint(); - m_MPICount.checkpoint(); - m_MPIByteCount.checkpoint(); - m_heapAlloc.checkpoint(); - - for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - (*it).m_timerImpl->checkpoint(); -} - - -void -TimerImpl::updateRootTimer(TimerImpl *root_timer) -{ - root_timer->m_lapCount.m_lapStop = value_now(); - root_timer->m_cpuTime.m_lapStop = value_now(); - root_timer->m_wallTime.m_lapStop = value_now(); - root_timer->m_MPICount.m_lapStop = value_now(); - root_timer->m_MPIByteCount.m_lapStop = value_now(); - root_timer->m_heapAlloc.m_lapStop = value_now(); - - root_timer->m_lapCount.m_accumulatedLap = root_timer->m_lapCount.m_lapStop - root_timer->m_lapCount.m_lapStart; - root_timer->m_cpuTime.m_accumulatedLap = root_timer->m_cpuTime.m_lapStop - root_timer->m_cpuTime.m_lapStart; - root_timer->m_wallTime.m_accumulatedLap = root_timer->m_wallTime.m_lapStop - root_timer->m_wallTime.m_lapStart; - root_timer->m_MPICount.m_accumulatedLap = root_timer->m_MPICount.m_lapStop - root_timer->m_MPICount.m_lapStart; - root_timer->m_MPIByteCount.m_accumulatedLap = root_timer->m_MPIByteCount.m_lapStop - root_timer->m_MPIByteCount.m_lapStart; - root_timer->m_heapAlloc.m_accumulatedLap = root_timer->m_heapAlloc.m_lapStop - root_timer->m_heapAlloc.m_lapStart; -} - - - -Timer -TimerImpl::createRootTimer( - const std::string & name, - const TimerSet & timer_set) -{ - TimerImpl *timer_impl = new TimerImpl(name, 0, 0, timer_set); - return Timer(timer_impl); -} - - -void -TimerImpl::deleteRootTimer( - TimerImpl * root_timer) -{ - delete root_timer; -} - - -void -TimerImpl::findTimer( - TimerImpl * timer, - std::vector & path_tail_vector, - std::vector & found_timers) -{ - if (timer->begin() == timer->end()) { // at leaf - } - else - for (TimerList::const_iterator it = timer->begin(); it != timer->end(); ++it) - findTimer((*it).m_timerImpl, path_tail_vector, found_timers); -} - - -Writer & -TimerImpl::dump( - Writer & dout) const -{ - if (dout.shouldPrint()) { - dout << "TimerImpl" << push << dendl; - dout << "m_name, " << m_name << dendl; - dout << "m_timerMask, " << m_timerMask << dendl; - dout << "m_subtimerLapCount, " << m_subtimerLapCount << dendl; - dout << "m_lapStartCount, " << m_lapStartCount << dendl; - - dout << "m_lapCount, " << m_lapCount << dendl; - dout << "m_cpuTime, " << m_cpuTime << dendl; - dout << "m_wallTime, " << m_wallTime << dendl; - dout << "m_MPICount, " << m_MPICount << dendl; - dout << "m_MPIByteCount, " << m_MPIByteCount << dendl; - dout << "m_heapAlloc, " << m_heapAlloc << dendl; - - dout << "m_subtimerList, " << m_subtimerList << dendl; - dout << pop; - } - - return dout; -} Timer::~Timer() {} @@ -765,25 +158,25 @@ Timer::begin() { return m_timerImpl->begin(); } - + TimerList::const_iterator Timer::begin() const { return m_timerImpl->begin(); } - + TimerList::iterator Timer::end() { return m_timerImpl->end(); } - + TimerList::const_iterator Timer::end() const { return m_timerImpl->end(); } - + double Timer::accumulateSubtimerLapCounts() const { return m_timerImpl->accumulateSubtimerLapCounts(); @@ -891,9 +284,9 @@ TimeBlockSynchronized::stop() namespace sierra { namespace Diag { -// +// // SierraRootTimer member functions: -// +// SierraRootTimer::SierraRootTimer() : m_sierraTimer(stk::diag::createRootTimer("Sierra", sierraTimerSet())) { } @@ -1057,14 +450,14 @@ TimerParser::parse( m_metricsSetMask = 0; m_metricsMask = 0; m_optionMask = getEnabledTimerMask(); - + m_optionMask = OptionMaskParser::parse(option_mask); setEnabledTimerMask(m_optionMask); - + if (m_metricsSetMask != 0) stk::diag::setEnabledTimerMetricsMask(m_metricsMask); - + return m_optionMask; } diff --git a/packages/stk/stk_util/stk_util/diag/Timer.hpp b/packages/stk/stk_util/stk_util/diag/Timer.hpp index 466c06d52e75..f4f9c391d4e0 100644 --- a/packages/stk/stk_util/stk_util/diag/Timer.hpp +++ b/packages/stk/stk_util/stk_util/diag/Timer.hpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #ifndef STK_UTIL_DIAG_Timer_hpp #define STK_UTIL_DIAG_Timer_hpp @@ -38,6 +38,7 @@ #include "stk_util/diag/Option.hpp" // for OptionMask, OptionMaskParser, OptionMaskP... #include "stk_util/diag/TimerMetricTraits.hpp" // for MetricTraits, MetricsMask, CPUTime (ptr o... #include "stk_util/environment/FormatTime.hpp" // for TimeFormat +#include "stk_util/util/Marshal.hpp" #include "stk_util/parallel/Parallel.hpp" // for ParallelMachine, ompi_communicator_t #include // for size_t #include // for list @@ -182,6 +183,7 @@ class Timer friend class TimerImpl; friend class TimeBlock; friend class TimeBlockSynchronized; + friend class TimerTester; friend void updateRootTimer(Timer); friend Timer createRootTimer(const std::string &, const TimerSet &); friend void deleteRootTimer(Timer); @@ -469,6 +471,53 @@ class Timer TimerImpl * m_timerImpl; ///< Reference to the actual timer }; +template +Marshal &operator<<(Marshal &mout, const Timer::Metric &t) { + mout << t.getAccumulatedLap(false) << t.getAccumulatedLap(true); + + return mout; +} + +inline Marshal &operator<<(Marshal &mout, const Timer &t) { + mout << t.getName() << t.getTimerMask() << t.getSubtimerLapCount() + << t.getMetric() << t.getMetric() << t.getMetric() + << t.getMetric() << t.getMetric() << t.getMetric(); + + mout << t.getTimerList(); + + return mout; +} + +/** + * @brief Function operator<< writes a timer to the diagnostic stream. + * + * @param dout a Writer reference to the diagnostic writer to print + * to. + * + * @param timer a Timer::Metric const reference to the timer + * to print. + * + * @return a Writer reference to dout. + */ +template +inline Writer &operator<<(Writer &dout, const Timer::Metric &timer) { + return timer.dump(dout); +} + +/** + * Function operator<< writes a timer metric to the diagnostic stream. + * + * @param dout a Writer reference to the diagnostic writer to print + * to. + * + * @param timer a Timer::Metric const reference to the timer + * to print. + * + * @return a Writer reference to dout. + */ +inline Writer &operator<<(Writer &dout, const Timer &timer) { + return timer.dump(dout); +} /** @@ -604,36 +653,6 @@ class TimeBlockSynchronized }; -/** - * @brief Function operator<< writes a timer to the diagnostic stream. - * - * @param dout a Writer reference to the diagnostic writer to print - * to. - * - * @param timer a Timer::Metric const reference to the timer - * to print. - * - * @return a Writer reference to dout. - */ -template -inline Writer &operator<<(Writer &dout, const Timer::Metric &timer) { - return timer.dump(dout); -} - -/** - * Function operator<< writes a timer metric to the diagnostic stream. - * - * @param dout a Writer reference to the diagnostic writer to print - * to. - * - * @param timer a Timer::Metric const reference to the timer - * to print. - * - * @return a Writer reference to dout. - */ -inline Writer &operator<<(Writer &dout, const Timer &timer) { - return timer.dump(dout); -} } // namespace diag } // namespace stk @@ -780,14 +799,14 @@ class TimerParser : public OptionMaskParser * @param arg a std::string const reference to the argument * values. */ - virtual void parseArg(const std::string &name, const std::string &arg) const; + virtual void parseArg(const std::string &name, const std::string &arg) const; mutable stk::diag::MetricsMask m_metricsSetMask; mutable stk::diag::MetricsMask m_metricsMask; }; -class SierraRootTimer +class SierraRootTimer { public: SierraRootTimer(); @@ -795,7 +814,7 @@ class SierraRootTimer stk::diag::Timer & sierraTimer(); private: - stk::diag::Timer m_sierraTimer; + stk::diag::Timer m_sierraTimer; }; } // namespace Diag diff --git a/packages/stk/stk_util/stk_util/diag/TimerImpl.cpp b/packages/stk/stk_util/stk_util/diag/TimerImpl.cpp new file mode 100644 index 000000000000..39181c702ab5 --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/TimerImpl.cpp @@ -0,0 +1,333 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "stk_util/diag/TimerImpl.hpp" +#include "stk_util/diag/Timer.hpp" + +namespace stk::diag { + +namespace { + +template +typename MetricTraits::Type +value_now() { + if (MetricTraits::METRIC & getEnabledTimerMetricsMask()) + return MetricTraits::value_now(); + else + return 0; +} + +} // namespace + + +TimerImpl::TimerImpl( + const std::string & name, + TimerMask timer_mask, + TimerImpl * parent_timer, + const TimerSet & timer_set) + : m_name(name), + m_timerMask(timer_mask), + m_parentTimer(parent_timer), + m_subtimerLapCount(0.0), + m_lapStartCount(0), + m_activeChildCount(0), + m_childCausedStart(false), + m_subtimerList(), + m_timerSet(timer_set) +{} + + +TimerImpl::~TimerImpl() +{ + try { + for (TimerList::iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + delete (*it).m_timerImpl; + } + catch (std::exception &) { + } +} + +bool TimerImpl::shouldRecord() const { + return m_timerSet.shouldRecord(m_timerMask) && getEnabledTimerMetricsMask(); +} + +void +TimerImpl::reset() +{ + m_lapStartCount = 0; + m_childCausedStart = false; + m_activeChildCount = 0; + + m_lapCount.reset(); + m_cpuTime.reset(); + m_wallTime.reset(); + m_MPICount.reset(); + m_MPIByteCount.reset(); + m_heapAlloc.reset(); +} + + +Timer +TimerImpl::getSubtimer( + const std::string & name) +{ + TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); + + if (it == m_subtimerList.end()) + throw std::runtime_error("Timer not found"); + else + return *it; +} + + +TimerImpl * +TimerImpl::addSubtimer( + const std::string & name, + TimerMask timer_mask, + const TimerSet & timer_set) +{ + TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); + + if (it == m_subtimerList.end()) { + TimerImpl *timer_impl = new TimerImpl(name, timer_mask, this, timer_set); + m_subtimerList.push_back(Timer(timer_impl)); + return timer_impl; + } + else + return (*it).m_timerImpl; +} + + +TimerImpl & +TimerImpl::start() +{ + if (shouldRecord()) { + if (m_lapStartCount == 0) { + ++m_lapStartCount; + m_lapCount.m_lapStart = m_lapCount.m_lapStop; + + m_cpuTime.m_lapStop = m_cpuTime.m_lapStart = value_now(); + m_wallTime.m_lapStop = m_wallTime.m_lapStart = value_now(); + m_MPICount.m_lapStop = m_MPICount.m_lapStart = value_now(); + m_MPIByteCount.m_lapStop = m_MPIByteCount.m_lapStart = value_now(); + m_heapAlloc.m_lapStop = m_heapAlloc.m_lapStart = value_now(); + if(m_parentTimer) + m_parentTimer->child_notifies_of_start(); + } + } + + return *this; +} + + +TimerImpl & +TimerImpl::lap() +{ + if (shouldRecord()) { + if (m_lapStartCount > 0) { + m_cpuTime.m_lapStop = value_now(); + m_wallTime.m_lapStop = value_now(); + m_MPICount.m_lapStop = value_now(); + m_MPIByteCount.m_lapStop = value_now(); + m_heapAlloc.m_lapStop = value_now(); + } + } + + return *this; +} + +TimerImpl & TimerImpl::child_notifies_of_start() +{ + //Start only if not already started and this isn't a root timer + if(m_lapStartCount == 0 && m_parentTimer) + { + start(); + m_childCausedStart = true; + } + m_activeChildCount++; + + return *this; +} + +TimerImpl & TimerImpl::child_notifies_of_stop() +{ + m_activeChildCount--; + if(m_activeChildCount == 0 && m_childCausedStart) + { + stop(); + } + return *this; +} + +TimerImpl & +TimerImpl::stop() +{ + if (shouldRecord()) { + if (m_lapStartCount > 0) { + m_lapStartCount = 0; + m_lapCount.m_lapStop++; + m_childCausedStart = false; + m_activeChildCount = 0; + + m_cpuTime.m_lapStop = value_now(); + m_wallTime.m_lapStop = value_now(); + m_MPICount.m_lapStop = value_now(); + m_MPIByteCount.m_lapStop = value_now(); + m_heapAlloc.m_lapStop = value_now(); + + m_lapCount.addLap(); + m_cpuTime.addLap(); + m_wallTime.addLap(); + m_MPICount.addLap(); + m_MPIByteCount.addLap(); + m_heapAlloc.addLap(); + if(m_parentTimer) + m_parentTimer->child_notifies_of_stop(); + } + } + + return *this; +} + + +double +TimerImpl::accumulateSubtimerLapCounts() const +{ + m_subtimerLapCount = m_lapCount.getAccumulatedLap(false); + + for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + (*it).m_timerImpl->accumulateSubtimerLapCounts(); + + for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + m_subtimerLapCount += (*it).m_timerImpl->m_subtimerLapCount; + + return m_subtimerLapCount; +} + + +void +TimerImpl::checkpoint() const +{ + m_lapCount.checkpoint(); + m_cpuTime.checkpoint(); + m_wallTime.checkpoint(); + m_MPICount.checkpoint(); + m_MPIByteCount.checkpoint(); + m_heapAlloc.checkpoint(); + + for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + (*it).m_timerImpl->checkpoint(); +} + + +void +TimerImpl::updateRootTimer(TimerImpl *root_timer) +{ + root_timer->m_lapCount.m_lapStop = value_now(); + root_timer->m_cpuTime.m_lapStop = value_now(); + root_timer->m_wallTime.m_lapStop = value_now(); + root_timer->m_MPICount.m_lapStop = value_now(); + root_timer->m_MPIByteCount.m_lapStop = value_now(); + root_timer->m_heapAlloc.m_lapStop = value_now(); + + root_timer->m_lapCount.m_accumulatedLap = root_timer->m_lapCount.m_lapStop - root_timer->m_lapCount.m_lapStart; + root_timer->m_cpuTime.m_accumulatedLap = root_timer->m_cpuTime.m_lapStop - root_timer->m_cpuTime.m_lapStart; + root_timer->m_wallTime.m_accumulatedLap = root_timer->m_wallTime.m_lapStop - root_timer->m_wallTime.m_lapStart; + root_timer->m_MPICount.m_accumulatedLap = root_timer->m_MPICount.m_lapStop - root_timer->m_MPICount.m_lapStart; + root_timer->m_MPIByteCount.m_accumulatedLap = root_timer->m_MPIByteCount.m_lapStop - root_timer->m_MPIByteCount.m_lapStart; + root_timer->m_heapAlloc.m_accumulatedLap = root_timer->m_heapAlloc.m_lapStop - root_timer->m_heapAlloc.m_lapStart; +} + + + +Timer +TimerImpl::createRootTimer( + const std::string & name, + const TimerSet & timer_set) +{ + TimerImpl *timer_impl = new TimerImpl(name, 0, 0, timer_set); + return Timer(timer_impl); +} + + +void +TimerImpl::deleteRootTimer( + TimerImpl * root_timer) +{ + delete root_timer; +} + + +void +TimerImpl::findTimer( + TimerImpl * timer, + std::vector & path_tail_vector, + std::vector & found_timers) +{ + if (timer->begin() == timer->end()) { // at leaf + } + else + for (TimerList::const_iterator it = timer->begin(); it != timer->end(); ++it) + findTimer((*it).m_timerImpl, path_tail_vector, found_timers); +} + + +Writer & +TimerImpl::dump( + Writer & dout) const +{ + if (dout.shouldPrint()) { + dout << "TimerImpl" << push << dendl; + dout << "m_name, " << m_name << dendl; + dout << "m_timerMask, " << m_timerMask << dendl; + dout << "m_subtimerLapCount, " << m_subtimerLapCount << dendl; + dout << "m_lapStartCount, " << m_lapStartCount << dendl; + + dout << "m_lapCount, " << m_lapCount << dendl; + dout << "m_cpuTime, " << m_cpuTime << dendl; + dout << "m_wallTime, " << m_wallTime << dendl; + dout << "m_MPICount, " << m_MPICount << dendl; + dout << "m_MPIByteCount, " << m_MPIByteCount << dendl; + dout << "m_heapAlloc, " << m_heapAlloc << dendl; + + dout << "m_subtimerList, " << m_subtimerList << dendl; + dout << pop; + } + + return dout; +} + + + +} \ No newline at end of file diff --git a/packages/stk/stk_util/stk_util/diag/TimerImpl.hpp b/packages/stk/stk_util/stk_util/diag/TimerImpl.hpp new file mode 100644 index 000000000000..e17493e51d5f --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/TimerImpl.hpp @@ -0,0 +1,370 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef STK_UTIL_DIAG_TimerImpl_hpp +#define STK_UTIL_DIAG_TimerImpl_hpp + +#include "stk_util/diag/TimerMetricTraits.hpp" +#include "stk_util/util/string_case_compare.hpp" // for equal_case +#include "stk_util/diag/Timer.hpp" +#include "stk_util/util/Writer.hpp" // for operator<<, Writer, dendl, pop, push +#include "stk_util/diag/WriterExt.hpp" // for operator<< + + +namespace stk::diag { + + +/** + * Class TimerImpl is the core timer class. The Timer class is a + * wrapper around TimerImpl so that the buried references can be constructed more easily. + * + * Each timer has a lap counter, cpu timer, wall timer and other metrics. Each time a timer is + * started, the cpu start time, wall start time and other metrics, set to the process' current + * values. When the timer is stopped, the lap counter is incremented, and the cpu, wall, and other + * values are accumulated with the difference between now and the start time. + * + * Each timer may have a list of subordinate timers. The relationship is purely + * hierarchical in that a there is no timing relationship assumed between the timers other + * than the grouping. There is no relation between the starting and stopping of parent + * and subordinate timers. + * + * The subordinate timers are stored as pointers to a new timer on the heap, since the + * calling function will be receiving a reference to this memory which can never change + * location. The subordinate timers are not sorted in the list as they should very + * rarely be created or looked up by name, rather the calling function stores the + * reference via the Timer class. + * + */ +class TimerImpl +{ + friend class Timer; + friend class TimerTester; + +public: + static void updateRootTimer(TimerImpl *root_timer); + + static Timer createRootTimer(const std::string &name, const TimerSet &timer_set); + + static void deleteRootTimer(TimerImpl *root_timer); + + static void findTimer(TimerImpl *timer, std::vector &path_tail_vector, std::vector &found_timers); + +private: + /** + * Static function reg returns a reference to an existing timer or newly + * created timer of the specified name which is subordinate to the + * parent timer. + * + * @return a TimerImpl reference to the timer with the + * specified name that is subordinate to the + * parent timer. + */ + static TimerImpl *reg(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set) { + return parent_timer->addSubtimer(name, timer_mask, timer_set); + } + + /** + * Creates a new Timer instance. + * + * @param name a std::string const reference to the name of + * the timer. + * + */ + TimerImpl(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set); + + /** + * Destroys a TimerImpl instance. + * + */ + ~TimerImpl(); + + TimerImpl(const TimerImpl &TimerImpl); + TimerImpl &operator=(const TimerImpl &TimerImpl); + + /** + * Class finder is a binary predicate for finding a subordinate timer. + * + * Note that the subordinate timer is an unsorted list as there are very few timers + * created and should rarely be looked up by name. + */ +#ifdef __INTEL_COMPILER +#pragma warning(push) +#pragma warning(disable: 444) +#endif + class finder + { + public: + explicit finder(const std::string &name) + : m_name(name) + {} + + bool operator()(Timer timer) const { + return equal_case(timer.getName(), m_name); + } + + private: + std::string m_name; + }; +#ifdef __INTEL_COMPILER +#pragma warning(pop) +#endif + +public: + /** + * Member function getName returns the name of the timer. + * + * @return a std::string const reference to the timer's + * name. + */ + const std::string &getName() const { + return m_name; + } + + /** + * Member function getTimerMask returns the timer mask of the timer. + * + * @return a TimerMask value to the timer mask. + */ + TimerMask getTimerMask() const { + return m_timerMask; + } + + /** + * Member function getTimerSet returns the timer set of the timer. + * + * @return a TimerSet const reference to the timer set. + */ + const TimerSet &getTimerSet() const { + return m_timerSet; + } + + /** + * Member function shouldRecord returns true if any of the specified timer + * bit masks are set in the enable timer bit mask. + */ + bool shouldRecord() const; + + /** + * Member function getSubtimerLapCount returns the subtimer lap counter. + * + * @return a Counter value of the subtimer lap counter. + */ + double getSubtimerLapCount() const { + return m_subtimerLapCount; + } + + void setSubtimerLapCount(double value) { + m_subtimerLapCount = value; + } + + /** + * Member function getLapCount returns the lap counter metric. The lap + * count metric is the number of times the stop function has been executed. + * + * @return a CounterMetric const reference of the lap counter + * metric. + */ + template + const Timer::Metric &getMetric() const; + + /** + * Member function getTimerList returns the subtimers associated with + * this timer. + * + * @return a TimerList const reference to the sub + * time list. + */ + const TimerList &getTimerList() const { + return m_subtimerList; + } + + TimerList::iterator begin() { + return m_subtimerList.begin(); + } + + TimerList::const_iterator begin() const { + return m_subtimerList.begin(); + } + + TimerList::iterator end() { + return m_subtimerList.end(); + } + + TimerList::const_iterator end() const { + return m_subtimerList.end(); + } + + /** + * Member function reset resets the accumulated time and lap times. + * + */ + void reset(); + + /** + * Member function checkpoint checkpoints the timer and all subtimers. + * + */ + void checkpoint() const; + + /** + * Member function start sets the start timer. + * + * @return a TimerImpl reference to the timer. + */ + TimerImpl &start(); + + /** + * Member function lap sets the stop timer. + * + * @return a TimerImpl reference to the timer. + */ + TimerImpl &lap(); + + /** + * Member function stop sets the stop timer and sums the just completed lap + * time to the timer. + * + * @return a TimerImpl reference to the timer. + */ + TimerImpl &stop(); + + /** + * Member function accumulateSubtimerLapCounts sums the lap counter of all + * subordinate timers. This is used to determin which timers have been activated at all. + * + * @return an int value of the number of subordinate + * timer laps. + */ + double accumulateSubtimerLapCounts() const; + + Timer getSubtimer(const std::string &name); + +public: + /** + * Member function dump writes the timer to the specified + * diagnostic writer. + * + * @param dout a Writer variable reference to write the timer to. + * + * @return a Writer reference to dout. + */ + Writer &dump(Writer &dout) const; + +private: + /** + * Member function addSubtimer returns a reference to an existing or new + * subtimer with the specified name. + * + * @param name a std::string value of the timer's name. + * + * @param timer_mask a TimerMask value of the class of the timer. + * + * @return a TimerImpl reference to the timer with + * specified name. + */ + TimerImpl *addSubtimer(const std::string &name, TimerMask timer_mask, const TimerSet &timer_set); + TimerImpl & child_notifies_of_start(); + TimerImpl & child_notifies_of_stop(); + +private: + std::string m_name; ///< Name of the timer + TimerMask m_timerMask; ///< Bit mask to enable timer + TimerImpl * m_parentTimer; ///< Parent timer + mutable double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount + unsigned m_lapStartCount; ///< Number of pending lap stops + unsigned m_activeChildCount; ///< How many children timers have been started + bool m_childCausedStart; ///< Was this timer started because a child was started? + + TimerList m_subtimerList; ///< List of subordinate timers + + const TimerSet & m_timerSet; ///< Timer enabled mask + Timer::Metric m_lapCount; ///< Number of laps accumulated + Timer::Metric m_cpuTime; ///< CPU time + Timer::Metric m_wallTime; ///< Wall time + Timer::Metric m_MPICount; ///< MPI call count + Timer::Metric m_MPIByteCount; ///< MPI byte count + Timer::Metric m_heapAlloc; ///< Heap allocated +}; + +inline Writer &operator<<(Writer &dout, const TimerImpl &timer) { + return timer.dump(dout); +} + + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_lapCount; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_cpuTime; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_wallTime; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_MPICount; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_MPIByteCount; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_heapAlloc; +} + + +} + +#endif \ No newline at end of file diff --git a/packages/stk/stk_util/stk_util/environment/EnvData.cpp b/packages/stk/stk_util/stk_util/environment/EnvData.cpp index b09aff4f1650..6a27223777d0 100644 --- a/packages/stk/stk_util/stk_util/environment/EnvData.cpp +++ b/packages/stk/stk_util/stk_util/environment/EnvData.cpp @@ -63,7 +63,6 @@ namespace stk { m_inputFileRequired(true), m_checkSubCycle(false), m_checkSmRegion(false), - m_isZapotec(false), m_worldComm(MPI_COMM_NULL), m_parallelComm(MPI_COMM_NULL), m_parallelSize(-1), diff --git a/packages/stk/stk_util/stk_util/environment/EnvData.hpp b/packages/stk/stk_util/stk_util/environment/EnvData.hpp index 21ba461baa46..7afce19069a5 100644 --- a/packages/stk/stk_util/stk_util/environment/EnvData.hpp +++ b/packages/stk/stk_util/stk_util/environment/EnvData.hpp @@ -108,7 +108,6 @@ struct EnvData bool m_inputFileRequired; bool m_checkSubCycle; bool m_checkSmRegion; - bool m_isZapotec; MPI_Comm m_worldComm; diff --git a/packages/stk/stk_util/stk_util/environment/Scheduler.cpp b/packages/stk/stk_util/stk_util/environment/Scheduler.cpp index 94c49b1a56df..00d1837b7ea4 100644 --- a/packages/stk/stk_util/stk_util/environment/Scheduler.cpp +++ b/packages/stk/stk_util/stk_util/environment/Scheduler.cpp @@ -196,7 +196,7 @@ bool Scheduler::internal_is_it_time(Time time) // called multiple times with the same argument, it will return the // same response. - assert(time >= lastTime_); + STK_ThrowAssertMsg(time >= lastTime_, "time = " << time << ", lastTime_ = " << lastTime_); // If this is a restart, then calculate what the lastTime_ setting would // have been for this scheduler (based only on start time and deltas). diff --git a/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp b/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp index c0dc9a2d8f34..12bc0522d186 100644 --- a/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp +++ b/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp @@ -72,6 +72,8 @@ using MemSpace = Kokkos::HIPSpace; using MemSpace = ExecSpace::memory_space; #endif +using HostMemSpace = HostExecSpace::memory_space; + #ifdef KOKKOS_ENABLE_HIP template using RangePolicy = Kokkos::RangePolicy>; diff --git a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp index c2b9f9ded275..fe5e52134cdd 100644 --- a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp +++ b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp @@ -42,7 +42,7 @@ //In Sierra, STK_VERSION_STRING is provided on the compile line by bake. //For Trilinos stk snapshots, the following macro definition gets populated with //the real version string by the trilinos_snapshot.sh script. -#define STK_VERSION_STRING "5.23.1-605-g31b54b7f" +#define STK_VERSION_STRING "5.23.2-429-g07a311ce" #endif namespace stk { diff --git a/packages/stk/stk_util/stk_util/util/FPExceptions.hpp b/packages/stk/stk_util/stk_util/util/FPExceptions.hpp index 3d65d0a6017a..e2f94a533d09 100644 --- a/packages/stk/stk_util/stk_util/util/FPExceptions.hpp +++ b/packages/stk/stk_util/stk_util/util/FPExceptions.hpp @@ -32,13 +32,20 @@ constexpr bool have_errexcept() #endif } +constexpr int FE_EXCEPT_CHECKS = FE_ALL_EXCEPT & ~FE_INEXACT; + std::string get_fe_except_string(int fe_except_bitmask); inline void clear_fp_errors() { if constexpr (have_errexcept()) { - std::feclearexcept(FE_ALL_EXCEPT); + // experimental results show calling std::feclearexcept is *very* + // expensive, so dont call it unless needed. + if (std::fetestexcept(FE_EXCEPT_CHECKS) > 0) + { + std::feclearexcept(FE_EXCEPT_CHECKS); + } } else if constexpr (have_errno()) { errno = 0; @@ -49,7 +56,7 @@ inline void throw_or_warn_on_fp_error(const char* fname = nullptr, bool warn=fal { if constexpr (have_errexcept()) { - int fe_except_bitmask = std::fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT); + int fe_except_bitmask = std::fetestexcept(FE_EXCEPT_CHECKS); if (fe_except_bitmask != 0) { std::string msg = std::string(fname ? fname : "") + " raised floating point error(s): " + get_fe_except_string(fe_except_bitmask); @@ -76,6 +83,7 @@ inline void throw_or_warn_on_fp_error(const char* fname = nullptr, bool warn=fal } } } + } inline void warn_on_fp_error(const char* fname = nullptr, std::ostream& os = std::cerr) diff --git a/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp b/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp index 567e4f875024..f905bb7f171d 100644 --- a/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp +++ b/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp @@ -44,17 +44,14 @@ class NgpVector { using HostSpace = Kokkos::DefaultHostExecutionSpace; public: - NgpVector(const std::string &n) : NgpVector(n, 0) - { - } - NgpVector() : NgpVector(get_default_name()) - { - } - NgpVector(const std::string &n, size_t s) - : mSize(s), - deviceVals(Kokkos::view_alloc(Kokkos::WithoutInitializing, n), mSize), - hostVals(Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceVals)) - { + virtual ~NgpVector() = default; + NgpVector(const std::string &n) : NgpVector(n, 0) {} + NgpVector() : NgpVector(get_default_name()) {} + NgpVector(const std::string &n, size_t s) + : mSize(s), + deviceVals(Kokkos::view_alloc(Kokkos::WithoutInitializing, n), mSize), + hostVals(Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceVals)) + { } NgpVector(size_t s) : NgpVector(get_default_name(), s) { diff --git a/packages/stk/stk_util/stk_util/util/StridedArray.hpp b/packages/stk/stk_util/stk_util/util/StridedArray.hpp index 69881b38abeb..0e19d0de940d 100644 --- a/packages/stk/stk_util/stk_util/util/StridedArray.hpp +++ b/packages/stk/stk_util/stk_util/util/StridedArray.hpp @@ -36,7 +36,7 @@ #include #include -#include +#include "Kokkos_Macros.hpp" namespace stk { diff --git a/packages/xpetra/src/Operator/Xpetra_EpetraOperator.hpp b/packages/xpetra/src/Operator/Xpetra_EpetraOperator.hpp index 411c33cb176f..ab75af2e267e 100644 --- a/packages/xpetra/src/Operator/Xpetra_EpetraOperator.hpp +++ b/packages/xpetra/src/Operator/Xpetra_EpetraOperator.hpp @@ -151,13 +151,13 @@ class EpetraInverseOperator : public Operator > getDomainMap() const { + virtual const Teuchos::RCP > getDomainMap() const { XPETRA_MONITOR("EpetraOperator::getDomainMap()"); return toXpetra(op_->OperatorDomainMap()); } //! The Map associated with the range of this operator, which must be compatible with Y.getMap(). - virtual Teuchos::RCP > getRangeMap() const { + virtual const Teuchos::RCP > getRangeMap() const { XPETRA_MONITOR("EpetraOperator::getRangeMap()"); return toXpetra(op_->OperatorRangeMap()); } diff --git a/packages/xpetra/src/Operator/Xpetra_TpetraOperator.hpp b/packages/xpetra/src/Operator/Xpetra_TpetraOperator.hpp index f279c037d15c..ad33f6fa4d78 100644 --- a/packages/xpetra/src/Operator/Xpetra_TpetraOperator.hpp +++ b/packages/xpetra/src/Operator/Xpetra_TpetraOperator.hpp @@ -128,12 +128,12 @@ class TpetraOperator //@{ //! The Map associated with the domain of this operator, which must be compatible with X.getMap(). - virtual Teuchos::RCP > getDomainMap() const { + virtual const Teuchos::RCP > getDomainMap() const { return Teuchos::null; } //! The Map associated with the range of this operator, which must be compatible with Y.getMap(). - virtual Teuchos::RCP > getRangeMap() const { + virtual const Teuchos::RCP > getRangeMap() const { return Teuchos::null; } @@ -199,12 +199,12 @@ class TpetraOperator //@{ //! The Map associated with the domain of this operator, which must be compatible with X.getMap(). - virtual Teuchos::RCP > getDomainMap() const { + virtual const Teuchos::RCP > getDomainMap() const { return Teuchos::null; } //! The Map associated with the range of this operator, which must be compatible with Y.getMap(). - virtual Teuchos::RCP > getRangeMap() const { + virtual const Teuchos::RCP > getRangeMap() const { return Teuchos::null; }